| @@ -70,6 +70,8 @@ const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; | |||||
| const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; | const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; | ||||
| const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | ||||
| const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | ||||
| const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout"; | |||||
| const char_t *const OPTION_EXEC_MODEL_EXEC_TIMEOUT = "ge.exec.modelExecTimeout"; | |||||
| // Option key: memory init | // Option key: memory init | ||||
| const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | ||||
| @@ -145,7 +145,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); | |||||
| extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); | extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); | ||||
| /** | /** | ||||
| * @brief AllGather operator. | |||||
| * @brief Send operator. | |||||
| * | * | ||||
| * @param sendBuff A pointer identifying the input data address of the operator. | * @param sendBuff A pointer identifying the input data address of the operator. | ||||
| * @param count An integer(u64) identifying the number of the send data. | * @param count An integer(u64) identifying the number of the send data. | ||||
| @@ -158,7 +158,7 @@ extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); | |||||
| extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm, | extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm, | ||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| * @brief AllGather operator. | |||||
| * @brief Recv operator. | |||||
| * | * | ||||
| * @param recvBuff A pointer identifying the output data address of the operator. | * @param recvBuff A pointer identifying the output data address of the operator. | ||||
| * @param count An integer(u64) identifying the number of the receive data. | * @param count An integer(u64) identifying the number of the receive data. | ||||
| @@ -171,6 +171,30 @@ extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, | |||||
| extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm, | extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm, | ||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | |||||
| * @brief AlltoAllV operator. | |||||
| * | |||||
| * @param sendBuff A pointer identifying the input data address of the operator. | |||||
| * @param sendCounts Integer array, where entry i specifies the number of elements to send to rank i. | |||||
| * @param sdispls Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) | |||||
| * from which to send data to rank i. | |||||
| * @param sendType Datatype of send buffer elements, must be one of the following types: int8, int32, int64, uint64, | |||||
| * float16, float32. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param recvCounts Integer array, where entry j specifies the number of elements to receive from rank j. | |||||
| * @param rdispls Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to | |||||
| * which data from rank j should be written. | |||||
| * @param recvType Datatype of receive buffer elements, must be one of the following types: int8, int32, int64, uint64, | |||||
| * float16, float32. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclAlltoAllV(const void *sendBuf, const void *sendCounts, const void *sdispls, HcclDataType sendType, | |||||
| const void *recvBuf, const void *recvCounts, const void *rdispls, HcclDataType recvType, | |||||
| HcclComm comm, aclrtStream stream); | |||||
| /** | /** | ||||
| * @brief Destroy HCCL comm | * @brief Destroy HCCL comm | ||||
| * | * | ||||
| @@ -88,7 +88,7 @@ | |||||
| if ((expr) != ge::GRAPH_SUCCESS) { \ | if ((expr) != ge::GRAPH_SUCCESS) { \ | ||||
| REPORT_CALL_ERROR("E19999", "Operator graph failed"); \ | REPORT_CALL_ERROR("E19999", "Operator graph failed"); \ | ||||
| GELOGE(ge::FAILED, __VA_ARGS__); \ | GELOGE(ge::FAILED, __VA_ARGS__); \ | ||||
| return (FAILED); \ | |||||
| return (ge::FAILED); \ | |||||
| } \ | } \ | ||||
| } while (false) | } while (false) | ||||
| @@ -309,6 +309,7 @@ struct Options { | |||||
| int32_t physical_device_id; | int32_t physical_device_id; | ||||
| std::string profiling_mode; | std::string profiling_mode; | ||||
| std::string profiling_options; | std::string profiling_options; | ||||
| int32_t graphExecTimeout; | |||||
| }; | }; | ||||
| // Profiling info of task | // Profiling info of task | ||||
| @@ -45,14 +45,6 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||||
| private: | private: | ||||
| std::set<std::string> op_type_list_; | std::set<std::string> op_type_list_; | ||||
| }; | }; | ||||
| class GE_FUNC_VISIBILITY OpTypeRegistrar { | |||||
| public: | |||||
| explicit OpTypeRegistrar(const std::string &op_type) noexcept { | |||||
| OpTypeContainer::Instance()->Register(op_type); | |||||
| } | |||||
| ~OpTypeRegistrar() {} | |||||
| }; | |||||
| } // namespace ge | } // namespace ge | ||||
| #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ | #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ | ||||
| @@ -60,7 +52,7 @@ class GE_FUNC_VISIBILITY OpTypeRegistrar { | |||||
| #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | ||||
| const char_t *var_name = str_name; \ | const char_t *var_name = str_name; \ | ||||
| const ge::OpTypeRegistrar g_##var_name##_reg(str_name); | |||||
| const bool g_##var_name##_reg = (static_cast<void>(OpTypeContainer::Instance()->Register(str_name)), true); | |||||
| #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) | #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) | ||||
| #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ | #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ | ||||
| @@ -164,9 +164,8 @@ class ProfilingContext { | |||||
| int64_t RegisterString(const std::string &str); | int64_t RegisterString(const std::string &str); | ||||
| int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); | int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); | ||||
| void UpdateElementHashId(const MsprofReporterCallback reporter_callback); | |||||
| static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, | |||||
| uint64_t &hash_id); | |||||
| void UpdateElementHashId(); | |||||
| static Status QueryHashId(const std::string &src_str, uint64_t &hash_id); | |||||
| size_t GetRegisterStringNum() const { | size_t GetRegisterStringNum() const { | ||||
| return strings_to_index_.size(); | return strings_to_index_.size(); | ||||
| } | } | ||||
| @@ -0,0 +1,38 @@ | |||||
| /** | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_ | |||||
| #define INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_ | |||||
| #include <cstdint> | |||||
| namespace ge { | |||||
| constexpr int64_t kMaxDimSize = 32; | |||||
| #pragma pack(push, 1) | |||||
| struct RuntimeTensorDesc { | |||||
| uint64_t data_addr; | |||||
| int64_t data_offset_size; | |||||
| int64_t dtype; | |||||
| int64_t shape[kMaxDimSize + 1]; // shape:Dim_Num|DIM0|DIM1|...|DIM31 | |||||
| int64_t original_shape[kMaxDimSize + 1]; // original_shape:Dim_Num|DIM0|DIM1|...|DIM31 | |||||
| int64_t format; | |||||
| int64_t sub_format; | |||||
| uint8_t reserved[456]; // padding to 1024 bytes | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| } // namespace ge | |||||
| #endif // INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_ | |||||
| @@ -88,6 +88,7 @@ REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3, "DropOutDoMaskV3"); | |||||
| REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D"); | REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D"); | ||||
| REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D"); | REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D"); | ||||
| REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask"); | REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask"); | ||||
| REGISTER_OPTYPE_DECLARE(AXPYWITHSOFTMAXANDDROPOUTDOMASK, "AxpyWithSoftmaxAndDropOutDoMask"); | |||||
| REGISTER_OPTYPE_DECLARE(CONCAT, "Concat"); | REGISTER_OPTYPE_DECLARE(CONCAT, "Concat"); | ||||
| REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling"); | REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling"); | ||||
| REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal"); | REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal"); | ||||
| @@ -118,13 +118,13 @@ | |||||
| } while (false) | } while (false) | ||||
| // Check if the parameter is null. If yes, return PARAM_INVALID and record the error | // Check if the parameter is null. If yes, return PARAM_INVALID and record the error | ||||
| #define GE_CHECK_NOTNULL(val) \ | |||||
| do { \ | |||||
| if ((val) == nullptr) { \ | |||||
| REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ | |||||
| GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ | |||||
| return ge::PARAM_INVALID; \ | |||||
| } \ | |||||
| #define GE_CHECK_NOTNULL(val, ...) \ | |||||
| do { \ | |||||
| if ((val) == nullptr) { \ | |||||
| REPORT_INNER_ERROR("E19999", "Param:" #val " is nullptr, check invalid" __VA_ARGS__); \ | |||||
| GELOGE(ge::FAILED, "[Check][Param:" #val "]null is invalid" __VA_ARGS__); \ | |||||
| return ge::PARAM_INVALID; \ | |||||
| } \ | |||||
| } while (false) | } while (false) | ||||
| // Check if the parameter is null. If yes, just return and record the error | // Check if the parameter is null. If yes, just return and record the error | ||||
| @@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return Others failed | * @return Others failed | ||||
| */ | */ | ||||
| virtual domi::Status Parse(const char *file, ge::Graph &graph) = 0; | |||||
| virtual Status Parse(const char *file, ge::Graph &graph) = 0; | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| @@ -64,7 +64,7 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return FAILED | * @return FAILED | ||||
| * @author | * @author | ||||
| */ | */ | ||||
| virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | |||||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| @@ -76,7 +76,7 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return FAILED | * @return FAILED | ||||
| * @author | * @author | ||||
| */ | */ | ||||
| virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | |||||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| @@ -86,7 +86,7 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return Others failed | * @return Others failed | ||||
| */ | */ | ||||
| virtual domi::Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | |||||
| virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| @@ -97,8 +97,8 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return Others failed | * @return Others failed | ||||
| */ | */ | ||||
| virtual domi::Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||||
| ge::ComputeGraphPtr &graph) = 0; | |||||
| virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||||
| ge::ComputeGraphPtr &graph) = 0; | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| * @brief Convert model files to JSON format | * @brief Convert model files to JSON format | ||||
| @@ -107,10 +107,10 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return Others failed | * @return Others failed | ||||
| */ | */ | ||||
| virtual domi::Status ToJson(const char *model_file, const char *json_file) { | |||||
| virtual Status ToJson(const char *model_file, const char *json_file) { | |||||
| (void)model_file; | (void)model_file; | ||||
| (void)json_file; | (void)json_file; | ||||
| return domi::SUCCESS; | |||||
| return SUCCESS; | |||||
| } | } | ||||
| /* | /* | ||||
| @@ -121,7 +121,7 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| */ | */ | ||||
| virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; | virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; | ||||
| virtual domi::Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; | |||||
| virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| @@ -131,7 +131,7 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return Others failed | * @return Others failed | ||||
| */ | */ | ||||
| virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { | |||||
| virtual Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { | |||||
| (void)serialized_proto; | (void)serialized_proto; | ||||
| (void)graph; | (void)graph; | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| @@ -146,13 +146,24 @@ class GE_FUNC_VISIBILITY ModelParser { | |||||
| * @return SUCCESS | * @return SUCCESS | ||||
| * @return Others failed | * @return Others failed | ||||
| */ | */ | ||||
| virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, | |||||
| ge::ComputeGraphPtr &graph) { | |||||
| virtual Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, | |||||
| ge::ComputeGraphPtr &graph) { | |||||
| (void)serialized_proto; | (void)serialized_proto; | ||||
| (void)callback; | (void)callback; | ||||
| (void)graph; | (void)graph; | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| virtual bool HasError() { | |||||
| return false; | |||||
| } | |||||
| virtual Status Save(const std::string &file) { | |||||
| (void)file; | |||||
| return SUCCESS; | |||||
| } | |||||
| virtual void Clear(){}; | |||||
| }; | }; | ||||
| } // namespace domi | } // namespace domi | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "framework/omg/omg_inner_types.h" | #include "framework/omg/omg_inner_types.h" | ||||
| #include "framework/omg/parser/parser_types.h" | #include "framework/omg/parser/parser_types.h" | ||||
| #include "external/register/register.h" | |||||
| namespace domi { | namespace domi { | ||||
| class WeightsParser; | class WeightsParser; | ||||
| @@ -131,6 +132,12 @@ class GE_FUNC_VISIBILITY WeightsParserRegisterar { | |||||
| return std::shared_ptr<WeightsParser>(ptr); \ | return std::shared_ptr<WeightsParser>(ptr); \ | ||||
| } \ | } \ | ||||
| WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) | WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) | ||||
| }; // namespace domi | |||||
| class GE_FUNC_VISIBILITY OpRegTbeParserFactory { | |||||
| public: | |||||
| static OpRegTbeParserFactory *Instance(); | |||||
| void Finalize(const domi::OpRegistrationData ®_data); | |||||
| }; | |||||
| } // namespace domi | |||||
| #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ | #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ | ||||
| @@ -67,6 +67,17 @@ class GE_FUNC_VISIBILITY WeightsParser { | |||||
| * @author | * @author | ||||
| */ | */ | ||||
| virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; | virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; | ||||
| virtual bool HasError() { | |||||
| return false; | |||||
| } | |||||
| virtual Status Save(const std::string &file) { | |||||
| (void)file; | |||||
| return SUCCESS; | |||||
| } | |||||
| virtual void Clear() {} | |||||
| }; | }; | ||||
| } // namespace domi | } // namespace domi | ||||
| @@ -27,6 +27,15 @@ | |||||
| #include "framework/pne/pne_model.h" | #include "framework/pne/pne_model.h" | ||||
| namespace ge { | namespace ge { | ||||
| class ProcessNodeEngineImpl { | |||||
| public: | |||||
| virtual Status OptimizeGraph(const std::vector<GeTensor> &inputs, ComputeGraphPtr &compute_graph) = 0; | |||||
| virtual Status BuildGraph(ComputeGraphPtr &compute_graph, PneModelPtr &model) = 0; | |||||
| }; | |||||
| using ProcessNodeEngineImplPtr = std::shared_ptr<ProcessNodeEngineImpl>; | |||||
| class ProcessNodeEngine { | class ProcessNodeEngine { | ||||
| public: | public: | ||||
| ProcessNodeEngine() = default; | ProcessNodeEngine() = default; | ||||
| @@ -45,8 +54,11 @@ class ProcessNodeEngine { | |||||
| virtual const std::string &GetEngineName(const ge::NodePtr &node_ptr = nullptr) const = 0; | virtual const std::string &GetEngineName(const ge::NodePtr &node_ptr = nullptr) const = 0; | ||||
| virtual void SetImpl(ProcessNodeEngineImplPtr impl) = 0; | |||||
| protected: | protected: | ||||
| std::string engine_id_; | std::string engine_id_; | ||||
| ProcessNodeEngineImplPtr impl_ = nullptr; | |||||
| }; | }; | ||||
| using ProcessNodeEnginePtr = std::shared_ptr<ProcessNodeEngine>; | using ProcessNodeEnginePtr = std::shared_ptr<ProcessNodeEngine>; | ||||
| @@ -0,0 +1,27 @@ | |||||
| /** | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ | |||||
| #define AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ | |||||
| #include "model_v2_executor.h" | |||||
| #include "common/ge_types.h" | |||||
| namespace gert { | |||||
| std::unique_ptr<ModelV2Executor> LoadExecutorFromFile(const char *file_path, ge::graphStatus &error_code); | |||||
| std::unique_ptr<ModelV2Executor> LoadExecutorFromModelData(const ge::ModelData &model_data, | |||||
| ge::graphStatus &error_code); | |||||
| } // namespace gert | |||||
| #endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ | |||||
| @@ -0,0 +1,94 @@ | |||||
| /** | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ | |||||
| #define AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ | |||||
| #include "common/ge_types.h" | |||||
| #include "exe_graph/runtime/shape.h" | |||||
| #include "exe_graph/runtime/continuous_vector.h" | |||||
| #include "exe_graph/runtime/storage_format.h" | |||||
| #include "exe_graph/runtime/storage_shape.h" | |||||
| namespace gert { | |||||
| class ShapeRange { | |||||
| public: | |||||
| const Shape &GetMin() const; | |||||
| const Shape &GetMax() const; | |||||
| Shape &MutableMin(); | |||||
| Shape &MutableMax(); | |||||
| private: | |||||
| Shape min_; | |||||
| Shape max_; | |||||
| }; | |||||
| class ModelIoDesc { | |||||
| public: | |||||
| const char *GetName() const; | |||||
| int32_t GetDataType() const; | |||||
| ge::Format GetStorageFormat() const; | |||||
| ge::Format GetOriginFormat() const; | |||||
| int64_t GetSize() const; | |||||
| const Shape &GetStorageShape() const; | |||||
| const Shape &GetOriginShape() const; | |||||
| const ShapeRange &GetOriginShapeRange() const; | |||||
| const ShapeRange &GetStorageShapeRange() const; | |||||
| void SetName(const char *name); | |||||
| void SetDataType(int32_t data_type); | |||||
| void SetStorageFormat(ge::Format format); | |||||
| void SetOriginFormat(ge::Format format); | |||||
| Shape &MutableStorageShape(); | |||||
| Shape &MutableOriginShape(); | |||||
| ShapeRange &MutableOriginShapeRange(); | |||||
| ShapeRange &MutableStorageShapeRange(); | |||||
| private: | |||||
| const char *name_; | |||||
| int32_t data_type_; | |||||
| StorageFormat format_; | |||||
| StorageShape shape_; | |||||
| ShapeRange storage_shape_range_; | |||||
| ShapeRange origin_shape_range_; | |||||
| }; | |||||
| class ModelDesc { | |||||
| public: | |||||
| static size_t CalcSize(size_t input_num, size_t output_num); | |||||
| const ModelIoDesc *GetInputDesc(size_t index) const; | |||||
| const ModelIoDesc *GetAllInputsDesc(size_t &input_num) const; | |||||
| const ModelIoDesc *GetOutputDesc(size_t index) const; | |||||
| const ModelIoDesc *GetAllOutputsDesc(size_t &output_num) const; | |||||
| ModelIoDesc *MutableInputDesc(size_t index); | |||||
| ModelIoDesc *MutableOutputDesc(size_t index); | |||||
| ModelIoDesc *AllMutableIoDesc(size_t &input_num, size_t &output_num); | |||||
| void SetInputNum(size_t input_num); | |||||
| void SetOutputNum(size_t output_num); | |||||
| ge::graphStatus GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||||
| ge::graphStatus GetUserDesignateShapeOrder(std::vector<std::string> &user_designate_shape_order) const; | |||||
| ge::graphStatus GetModelAttrs(std::vector<std::string> &attrs) const; | |||||
| private: | |||||
| size_t input_num_; | |||||
| size_t output_num_; | |||||
| ContinuousVector model_io_descs_; | |||||
| }; | |||||
| } // namespace gert | |||||
| #endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ | |||||
| @@ -0,0 +1,142 @@ | |||||
| /** | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_ | |||||
| #define AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_ | |||||
| #include <memory> | |||||
| #include "graph/compute_graph.h" | |||||
| #include "graph/ge_error_codes.h" | |||||
| #include "model_desc.h" | |||||
| #include "runtime/stream.h" | |||||
| #include "exe_graph/runtime/tensor.h" | |||||
| namespace gert { | |||||
| enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd }; | |||||
| static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {(char *)"Init", (char *)"Main", (char *)"DeInit"}; | |||||
| inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) { | |||||
| return kSubExeGraphTypeStrs[type]; | |||||
| } | |||||
| class ResourceGuard { | |||||
| public: | |||||
| void *ResetExecutionData(std::unique_ptr<uint8_t[]> execution_data); | |||||
| void ResetAnyValue(std::unique_ptr<uint8_t[]> any_values, size_t count); | |||||
| void PushNode(void *node); | |||||
| void PushWatcher(void *watcher); | |||||
| void *ResetNodesArray(std::unique_ptr<uint8_t[]> nodes_array); | |||||
| void *ResetStartNodesArray(std::unique_ptr<uint8_t[]> start_nodes_array); | |||||
| void *ResetNodesIndgreeArray(std::unique_ptr<uint8_t[]> nodes_indgree_array); | |||||
| void *ResetNodesWaitIndgreeArray(std::unique_ptr<uint8_t[]> nodes_indgree_array); | |||||
| void *ResetInputsArray(std::unique_ptr<uint8_t[]> inputs_array); | |||||
| void *ResetOutputsArray(std::unique_ptr<uint8_t[]> outputs_array); | |||||
| void *ResetWatchersArray(std::unique_ptr<uint8_t[]> watchers_array); | |||||
| void *ResetReadyQueue(void *ready_queue); | |||||
| void *ResetBuffer(std::unique_ptr<uint8_t[]> buffer); | |||||
| void *ResetComputeNodeInfo(std::unique_ptr<uint8_t[]> compute_node_info); | |||||
| void *ResetKernelExtendInfo(std::unique_ptr<uint8_t[]> kernel_extend_info); | |||||
| void *ResetModelDesc(std::unique_ptr<uint8_t[]> model_desc); | |||||
| ~ResourceGuard(); | |||||
| private: | |||||
| std::unique_ptr<uint8_t[]> execution_data_holder_; | |||||
| size_t any_values_num_; | |||||
| std::unique_ptr<uint8_t[]> any_values_guard_; | |||||
| std::vector<std::unique_ptr<void, decltype(&free)>> nodes_guarder_; | |||||
| std::vector<std::unique_ptr<void, decltype(&free)>> watchers_guarder_; | |||||
| std::unique_ptr<uint8_t[]> continuous_buffer_guarder_; | |||||
| std::unique_ptr<uint8_t[]> buffer_guarder_; | |||||
| std::unique_ptr<uint8_t[]> compute_node_info_guarder_; | |||||
| std::unique_ptr<uint8_t[]> kernel_extend_info_guarder_; | |||||
| std::unique_ptr<uint8_t[]> model_desc_guarder_; | |||||
| std::unique_ptr<uint8_t[]> nodes_array_guarder_; | |||||
| std::unique_ptr<uint8_t[]> start_nodes_array_guarder_; | |||||
| std::unique_ptr<uint8_t[]> nodes_indgree_array_guarder_; | |||||
| std::unique_ptr<uint8_t[]> nodes_wait_indgree_array_guarder_; | |||||
| std::unique_ptr<uint8_t[]> inputs_array_guarder_; | |||||
| std::unique_ptr<uint8_t[]> outputs_array_guarder_; | |||||
| std::unique_ptr<uint8_t[]> watchers_array_guarder_; | |||||
| std::unique_ptr<void, decltype(&free)> ready_queue_guarder_{nullptr, nullptr}; | |||||
| }; | |||||
| struct ModelExecuteArg { | |||||
| rtStream_t stream; | |||||
| }; | |||||
| static_assert(std::is_standard_layout<ModelExecuteArg>::value, "The class ModelExecuteArg must be a POD"); | |||||
| class ExeGraphExecutor { | |||||
| public: | |||||
| // todo unload时释放anyvalue资源 | |||||
| ge::graphStatus Load() { | |||||
| return ge::GRAPH_SUCCESS; | |||||
| } | |||||
| ge::graphStatus UnLoad() { | |||||
| return ge::GRAPH_SUCCESS; | |||||
| } | |||||
| /** | |||||
| * 设置图执行的输入/输出,需要注意的是,使用者需要自己保证inputs/outputs刷新完全!!! | |||||
| */ | |||||
| ge::graphStatus SpecifyInputs(void **inputs, size_t start, size_t num); | |||||
| ge::graphStatus SpecifyOutputs(void **outputs, size_t num); | |||||
| ge::graphStatus Execute(); | |||||
| const void *GetExecutionData() const { | |||||
| return execution_data_; | |||||
| } | |||||
| ResourceGuard &GetResourceGuard(); | |||||
| void *SetExecutionData(std::unique_ptr<uint8_t[]> execution_data); | |||||
| private: | |||||
| friend class ModelV2ExecutorTestHelper; | |||||
| void *execution_data_; | |||||
| ResourceGuard resource_guard_; | |||||
| }; | |||||
| class ModelV2Executor { | |||||
| public: | |||||
| static std::unique_ptr<ModelV2Executor> Create(const ge::ComputeGraphPtr &root_graph); | |||||
| ge::graphStatus Load(); | |||||
| ge::graphStatus Execute(const ModelExecuteArg &arg, Tensor **inputs, size_t input_num, Tensor **outputs, | |||||
| size_t output_num); | |||||
| ge::graphStatus ExecuteSync(Tensor **inputs, size_t input_num, Tensor **outputs, size_t output_num); | |||||
| ge::graphStatus UnLoad(); | |||||
| const ModelDesc &GetModelDesc() const; | |||||
| void SetModelDesc(ModelDesc *model_desc); | |||||
| ModelV2Executor(const ModelV2Executor &) = delete; | |||||
| ModelV2Executor(ModelV2Executor &&) = delete; | |||||
| ModelV2Executor &operator=(const ModelV2Executor &) = delete; | |||||
| ModelV2Executor &operator=(ModelV2Executor &&) = delete; | |||||
| private: | |||||
| friend class ModelV2ExecutorBuilder; | |||||
| friend class ModelV2ExecutorTestHelper; | |||||
| ModelV2Executor() = default; | |||||
| private: | |||||
| std::array<ExeGraphExecutor, kSubExeGraphTypeEnd> graphs_; | |||||
| ResourceGuard resource_guard_; | |||||
| ModelDesc *model_desc_ = nullptr; | |||||
| rtStream_t default_stream_ = nullptr; | |||||
| }; | |||||
| } // namespace gert | |||||
| #endif // AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_ | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit f3e9df35da67ff00a22a09ec5b369bbc4bac9e74 | |||||
| Subproject commit 002617852e22767bd864db3c01595630e23f5496 | |||||
| @@ -211,6 +211,8 @@ typedef struct { | |||||
| #define HCCL_REQUEST_NULL NULL | #define HCCL_REQUEST_NULL NULL | ||||
| #define HCCL_TAG_ANY (1 << 30) | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| @@ -42,6 +42,7 @@ | |||||
| #include "list_ops.h" | #include "list_ops.h" | ||||
| #include "logging_ops.h" | #include "logging_ops.h" | ||||
| #include "lookup_ops.h" | #include "lookup_ops.h" | ||||
| #include "map_ops.h" | |||||
| #include "math_ops.h" | #include "math_ops.h" | ||||
| #include "matrix_calculation_ops.h" | #include "matrix_calculation_ops.h" | ||||
| #include "nn_batch_norm_ops.h" | #include "nn_batch_norm_ops.h" | ||||
| @@ -79,4 +80,5 @@ | |||||
| #include "warp_perspective_ops.h" | #include "warp_perspective_ops.h" | ||||
| #include "vector_search.h" | #include "vector_search.h" | ||||
| #include "deep_md.h" | #include "deep_md.h" | ||||
| #include "encoding_ops.h" | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ | ||||
| @@ -1550,6 +1550,39 @@ REG_OP(EnsureShape) | |||||
| DT_FLOAT,DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | DT_FLOAT,DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | ||||
| .REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
| .OP_END_FACTORY_REG(EnsureShape) | .OP_END_FACTORY_REG(EnsureShape) | ||||
| /** | |||||
| * @brief Finds the first unique element from every consecutive group of equivalent elements. | |||||
| * @par Inputs: | |||||
| * x: A ND tensor. | |||||
| * @par Attributes: | |||||
| * @li return_idx: An optional bool. Whether to also return the indices. The default value is False | |||||
| * @li return_count: An optional bool. Whether to also return the counts for each element. The default is False. | |||||
| * @li axis: An optional int. Which one axis to apply unique. The default is 1000, which means None. | |||||
| * @par Outputs: | |||||
| * @li y: "x" in the unique output "y". | |||||
| * @li idx: The index of each value of "x". | |||||
| * @li count: The counts of each value of "y". | |||||
| * @attention Constraints: | |||||
| * UniqueConsecutive runs on the Ascend AI CPU, which delivers poor performance. | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the PyTorch operator UniqueConsecutive. | |||||
| */ | |||||
| REG_OP(UniqueConsecutive) | |||||
| .INPUT(x, TensorType::BasicType()) | |||||
| .OUTPUT(y, TensorType::BasicType()) | |||||
| .OUTPUT(idx, TensorType::IndexNumberType()) | |||||
| .OUTPUT(count, TensorType::IndexNumberType()) | |||||
| .ATTR(return_idx, Bool, false) | |||||
| .ATTR(return_counts, Bool, false) | |||||
| .ATTR(axis, Int, 1000) | |||||
| .OP_END_FACTORY_REG(UniqueConsecutive) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ | ||||
| @@ -1,53 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file case_condition_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_ | |||||
| #include "graph/operator_reg.h" | |||||
| namespace ge { | |||||
| /** | |||||
| *@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU, | |||||
| y = 0 when i >= k && j < k, | |||||
| y = 1 when i == k && j == k, | |||||
| y = 2 when i > k && j == k, | |||||
| y = 3 when i == k && j > k, | |||||
| y = 4 when i > k && j > k, | |||||
| default y = 5 | |||||
| use for lu decomposition | |||||
| *@par Inputs: | |||||
| *x: A Tensor of type int32/int64/uint64. \n | |||||
| *@par Attributes: | |||||
| *algorithm: A string, only support LU now | |||||
| *@par Outputs: | |||||
| *y: A Tensor of type int32 | |||||
| */ | |||||
| REG_OP(CaseCondition) | |||||
| .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(y, TensorType({DT_INT32})) | |||||
| .ATTR(algorithm, String, "LU") | |||||
| .OP_END_FACTORY_REG(CaseCondition) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_ | |||||
| @@ -1,48 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file coordinates_1d_to_2d_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_ | |||||
| #include "graph/operator_reg.h" | |||||
| namespace ge { | |||||
| /** | |||||
| *@brief Convert one-dimensional coordinates to two-dimensional coordinates. | |||||
| *@par Inputs: | |||||
| *@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates. | |||||
| *@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W]. | |||||
| *@par Outputs: | |||||
| *@li row: row of two-dimensional | |||||
| *@li col: col of two-dimensional | |||||
| *@li n: col number of two-dimensional | |||||
| */ | |||||
| REG_OP(Coordinates1DTo2D) | |||||
| .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OP_END_FACTORY_REG(Coordinates1DTo2D) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_ | |||||
| @@ -1620,8 +1620,8 @@ REG_OP(Greater) | |||||
| * Compatible with the TensorFlow operator zeros_like. | * Compatible with the TensorFlow operator zeros_like. | ||||
| */ | */ | ||||
| REG_OP(ZerosLike) | REG_OP(ZerosLike) | ||||
| .INPUT(x, TensorType::BasicType()) | |||||
| .OUTPUT(y, TensorType::BasicType()) | |||||
| .INPUT(x, TensorType({BasicType(), DT_VARIANT})) | |||||
| .OUTPUT(y, TensorType({BasicType(), DT_VARIANT})) | |||||
| .OP_END_FACTORY_REG(ZerosLike) | .OP_END_FACTORY_REG(ZerosLike) | ||||
| /** | /** | ||||
| @@ -0,0 +1,49 @@ | |||||
| /** | |||||
| * Copyright (C) Huawei Technologies Co., Ltd 2022-2022. All rights reserved. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file encoding_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_ | |||||
| #include "graph/operator_reg.h" | |||||
| #include "graph/operator.h" | |||||
| namespace ge { | |||||
| /** | |||||
| * @brief An op to decode indices for LDPC code. \n | |||||
| * @par Inputs: | |||||
| * @li valid_num: an int32 tensor indicates index limit for each line. | |||||
| * @li matrix_info: an int32 2D-tensor store the block indices info of connection H matrix. \n | |||||
| * @par Outputs: | |||||
| * indices: an int32 2D-tensor store the concrete indices value. | |||||
| * | |||||
| * @par Restrictions: | |||||
| * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(LDPCDecode) | |||||
| .INPUT(valid_num, TensorType({DT_INT32})) | |||||
| .INPUT(matrix_info, TensorType({DT_INT32})) | |||||
| .OUTPUT(indices, TensorType({DT_INT32})) | |||||
| .OP_END_FACTORY_REG(LDPCDecode) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_ | |||||
| @@ -124,6 +124,30 @@ REG_OP(If) | |||||
| .GRAPH(else_branch) | .GRAPH(else_branch) | ||||
| .OP_END_FACTORY_REG(If) | .OP_END_FACTORY_REG(If) | ||||
| /** | |||||
| *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n | |||||
| *@par Inputs: | |||||
| *@li branch_index: A int32 scalar which determines the selected subgraph. | |||||
| *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n | |||||
| *@par Graphs: | |||||
| *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, | |||||
| * whose types are the same as what every other subgraph returns . \n | |||||
| *@par Outputs: | |||||
| *output: The output tensors returned by one of branches . It's a dynamic output. \n | |||||
| *@par Third-party framework compatibility | |||||
| *@Compatible with the TensorFlow operator Case. | |||||
| */ | |||||
| REG_OP(StatelessCase) | |||||
| .INPUT(branch_index, DT_INT32) | |||||
| .DYNAMIC_INPUT(input, TensorType::ALL()) | |||||
| .DYNAMIC_OUTPUT(output, TensorType::ALL()) | |||||
| .DYNAMIC_GRAPH(branches) | |||||
| .OP_END_FACTORY_REG(StatelessCase) | |||||
| /** | /** | ||||
| *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n | *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n | ||||
| @@ -1,63 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file index_to_addr_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_ | |||||
| #include "graph/operator_reg.h" | |||||
| namespace ge { | |||||
| /** | |||||
| *@brief get block tensor according to base addr tensor, for hccl remote read to use. | |||||
| *@par Inputs: | |||||
| *@li base_addr: A Tensor of type int64/uint64. \n | |||||
| *@li row:A Tensor of type int64/uint64. \n | |||||
| *@li col: A Tensor of type int64/uint64. | |||||
| *@par Outputs: | |||||
| *addr_table: list of [rank id, host addr, device addr, read size] | |||||
| *@par Attributes: | |||||
| *@li ori_shape: An required list int. Shape of base tensor. | |||||
| *@li block_size: An required list int. Shape of split block tensor. | |||||
| *@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to | |||||
| "Matrix". Currently only support Matrix storage | |||||
| *@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to | |||||
| "Matrix". Currently only support Matrix storage | |||||
| *@li rank_id: An optional int of rank id. Defaults is 0 | |||||
| *@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT | |||||
| */ | |||||
| REG_OP(IndexToAddr) | |||||
| .INPUT(base_addr, TensorType({DT_INT64, DT_UINT64})) | |||||
| .INPUT(x, TensorType({DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64})) | |||||
| .REQUIRED_ATTR(ori_shape, ListInt) | |||||
| .REQUIRED_ATTR(block_size, ListInt) | |||||
| .ATTR(ori_storage_mode, String, "Matrix") | |||||
| .ATTR(block_storage_mode, String, "Matrix") | |||||
| .ATTR(rank_id, Int, 0) | |||||
| .ATTR(dtype, Type, DT_FLOAT) | |||||
| .OP_END_FACTORY_REG(IndexToAddr) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_ | |||||
| @@ -0,0 +1,152 @@ | |||||
| /** | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file map_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_ | |||||
| #include "graph/operator_reg.h" | |||||
| namespace ge { | |||||
| /** | |||||
| * @brief Returns whether the given key exists in the map. \n | |||||
| * @par Inputs: | |||||
| * @li input_handle: A scalar Tensor of type variant. The original map. | |||||
| * @li key: The key to check. Supports int32, int64, string. \n | |||||
| * @par Outputs: | |||||
| * has_key: A scalar Tensor of type bool. Whether the key is already in the map or not. \n | |||||
| * @par Third-party framework compatibility. | |||||
| * Compatible with tensorflow TensorMapHasKey operator. | |||||
| */ | |||||
| REG_OP(TensorMapHasKey) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) | |||||
| .OUTPUT(has_key, TensorType({DT_BOOL})) | |||||
| .OP_END_FACTORY_REG(TensorMapHasKey) | |||||
| /** | |||||
| * @brief Returns a tensor map with item from given key erased. \n | |||||
| * @par Inputs: | |||||
| * @li input_handle: A scalar Tensor of type variant. The original map. | |||||
| * @li key: The key of the value to be erased. Supports int32, int64, string. \n | |||||
| * @par Outputs: | |||||
| * output_handle: A scalar Tensor of type variant. The map with value from given key removed. \n | |||||
| * @par Third-party framework compatibility. | |||||
| * Compatible with tensorflow TensorMapErase operator. | |||||
| */ | |||||
| REG_OP(TensorMapErase) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) | |||||
| .OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
| .OP_END_FACTORY_REG(TensorMapErase) | |||||
| /** | |||||
| * @brief Returns a map that is the 'input_handle' | |||||
| with the given key-value pair inserted. \n | |||||
| * @par Inputs: | |||||
| * @li input_handle: The original map, Must be type: DT_VARIANT. | |||||
| * @li key: A Tensor,the key to be inserted.Must be one of | |||||
| the following types: int32, int64, string. | |||||
| * @li value: A Tensor,the value to be inserted.Must be | |||||
| one of BasicType types. \n | |||||
| * @par Outputs: | |||||
| * output_handle: The map with key and value inserted. | |||||
| Must be type: DT_VARIANT. \n | |||||
| */ | |||||
| REG_OP(TensorMapInsert) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) | |||||
| .INPUT(value, BasicType) | |||||
| .OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
| .OP_END_FACTORY_REG(TensorMapInsert) | |||||
| /** | |||||
| * @brief Returns the value from a given key in a tensor map . \n | |||||
| * @par Inputs: | |||||
| * @li input_handle: The input map. Must be type: DT_VARIANT. | |||||
| * @li key: A Tensor,the key to be looked up. Must be one of | |||||
| the following types: int32,int64,string . \n | |||||
| * @par Attributes: | |||||
| * value_dtype: A int. Representing the type of value . \n | |||||
| * @par Outputs: | |||||
| * value: A Tensor,the value found from the given key. | |||||
| */ | |||||
| REG_OP(TensorMapLookup) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) | |||||
| .OUTPUT(value, BasicType) | |||||
| .REQUIRED_ATTR(value_dtype, Type) | |||||
| .OP_END_FACTORY_REG(TensorMapLookup) | |||||
| /** | |||||
| * @brief return TensorMap Size. \n | |||||
| * | |||||
| * @par Inputs: | |||||
| * input_handle: A Tensor. Must be one of the following types: variant. \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * size: A Tensor. Must be one of the following types: int32. \n | |||||
| */ | |||||
| REG_OP(TensorMapSize) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .OUTPUT(size, TensorType({DT_INT32})) | |||||
| .OP_END_FACTORY_REG(TensorMapSize) | |||||
| /** | |||||
| * @brief Return TensorMapStackKeys \n | |||||
| * | |||||
| * @par Inputs: | |||||
| * input_handle: A Tensor. Must be one of the following types: variant. \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * keys: A Tensor. Must be one of the following types: int32, int64, string. \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * key_dtype: An required param. It is the dtype of the key. | |||||
| */ | |||||
| REG_OP(TensorMapStackKeys) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .OUTPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING})) | |||||
| .REQUIRED_ATTR(key_dtype, Type) | |||||
| .OP_END_FACTORY_REG(TensorMapStackKeys) | |||||
| /** | |||||
| * @brief Creates and returns an empty tensor map. \n | |||||
| * @par Outputs: | |||||
| * handle: An empty tensor map . \n | |||||
| * @par Third-party framework compatibility. | |||||
| * Compatible with tensorflow EmptyTensorMap operator. | |||||
| */ | |||||
| REG_OP(EmptyTensorMap) | |||||
| .OUTPUT(handle, TensorType({DT_VARIANT})) | |||||
| .OP_END_FACTORY_REG(EmptyTensorMap) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_ | |||||
| @@ -24,6 +24,82 @@ | |||||
| #include "graph/operator_reg.h" | #include "graph/operator_reg.h" | ||||
| namespace ge { | namespace ge { | ||||
| /** | |||||
| * @brief Backprop W of AttentionLnQKV + ReduceSumD \n | |||||
| * @par Inputs: | |||||
| * Four inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: float16. | |||||
| * @li query_dx: A Tensor. Must be one of the following types: float16. | |||||
| * @li key_dw: A Tensor. Must be one of the following types: float16. | |||||
| * @li value_dw: A Tensor. Must be one of the following types: float16. | |||||
| * @par Attributes: | |||||
| * @li trans_a: A optional attribute, the type is bool. Defaults to True. | |||||
| * @li trans_b: A optional attribute, the type is bool. Defaults to False. \n | |||||
| * @par Outputs: | |||||
| * Six outputs, including: | |||||
| * @li dw_query: A Tensor. Must be one of the following types: float16. | |||||
| * @li dw_key: A Tensor. Must be one of the following types: float16. | |||||
| * @li dw_value: A Tensor. Must be one of the following types: float16. | |||||
| * @li dbias_query: A Tensor. Must be one of the following types: float16. | |||||
| * @li dbias_key: A Tensor. Must be one of the following types: float16. | |||||
| * @li dbias_value: A Tensor. Must be one of the following types: float16. \n | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n | |||||
| */ | |||||
| REG_OP(AttentionQKVGradW) | |||||
| .INPUT(x, TensorType({DT_FLOAT16})) | |||||
| .INPUT(query_dx, TensorType({DT_FLOAT16})) | |||||
| .INPUT(key_dw, TensorType({DT_FLOAT16})) | |||||
| .INPUT(value_dw, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dw_query, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dw_key, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dw_value, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dbias_query, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dbias_key, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dbias_value, TensorType({DT_FLOAT16})) | |||||
| .ATTR(trans_a, Bool, true) | |||||
| .ATTR(trans_b, Bool, false) | |||||
| .OP_END_FACTORY_REG(AttentionQKVGradW) | |||||
| /** | |||||
| * @brief Backprop X of AttentionLnQKV + AddN \n | |||||
| * @par Inputs: | |||||
| * Seven inputs, including: | |||||
| * @li ln_dx: A Tensor. Must be one of the following types: float16. | |||||
| * @li query_dx: A Tensor. Must be one of the following types: float16. | |||||
| * @li key_dw: A Tensor. Must be one of the following types: float16. | |||||
| * @li value_dw: A Tensor. Must be one of the following types: float16. | |||||
| * @li kernel_query: A Tensor. Must be one of the following types: float16. | |||||
| * @li kernel_key: A Tensor. Must be one of the following types: float16. | |||||
| * @li kernel_value: A Tensor. Must be one of the following types: float16. \n | |||||
| * @par Attributes: | |||||
| * @li trans_a: A optional attribute, the type is bool. Defaults to False. | |||||
| * @li trans_b: A optional attribute, the type is bool. Defaults to True. \n | |||||
| * @par Outputs: | |||||
| * One outputs, including: | |||||
| * @li dx: A Tensor. Must be one of the following types: float16. \n | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n | |||||
| */ | |||||
| REG_OP(AttentionQKVGradX) | |||||
| .INPUT(ln_dx, TensorType({DT_FLOAT16})) | |||||
| .INPUT(query_dx, TensorType({DT_FLOAT16})) | |||||
| .INPUT(key_dw, TensorType({DT_FLOAT16})) | |||||
| .INPUT(value_dw, TensorType({DT_FLOAT16})) | |||||
| .INPUT(kernel_query, TensorType({DT_FLOAT16})) | |||||
| .INPUT(kernel_key, TensorType({DT_FLOAT16})) | |||||
| .INPUT(kernel_value, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(dx, TensorType({DT_FLOAT16})) | |||||
| .ATTR(trans_a, Bool, false) | |||||
| .ATTR(trans_b, Bool, true) | |||||
| .OP_END_FACTORY_REG(AttentionQKVGradX) | |||||
| /** | /** | ||||
| * @brief | * @brief | ||||
| / (MatMul -> ConfusionTransposeD). | / (MatMul -> ConfusionTransposeD). | ||||
| @@ -54,6 +130,9 @@ namespace ge { | |||||
| * @li value_output: A Tensor. Must be one of the following types: float16. | * @li value_output: A Tensor. Must be one of the following types: float16. | ||||
| * @li mean: A Tensor. Must be one of the following types: float16. | * @li mean: A Tensor. Must be one of the following types: float16. | ||||
| * @li variance: A Tensor. Must be one of the following types: float16. \n | * @li variance: A Tensor. Must be one of the following types: float16. \n | ||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n | |||||
| */ | */ | ||||
| REG_OP(AttentionLnQKV) | REG_OP(AttentionLnQKV) | ||||
| .INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
| @@ -2276,6 +2276,46 @@ REG_OP(BalanceRois) | |||||
| .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(index, TensorType({DT_INT32})) | .OUTPUT(index, TensorType({DT_INT32})) | ||||
| .OP_END_FACTORY_REG(BalanceRois) | .OP_END_FACTORY_REG(BalanceRois) | ||||
| /** | |||||
| * @brief First calculate the minimum closure area of the two boxes, IoU, | |||||
| * The CIoU is obtained by combining the center distance and width to height ratio and IoU. \n | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with | |||||
| * shape (4, N). "N" indicates the number of bounding boxes, and the value | |||||
| * "4" refers to [x1, y1, x2, y2] or [x, y, w, h]. | |||||
| * @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 | |||||
| * with shape (4, M). "M" indicates the number of ground truth boxes, and | |||||
| * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n | |||||
| * @par Attributes: | |||||
| * @li trans: An optional bool, true for 'xywh', false for 'xyxy'. | |||||
| * @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N] | |||||
| * @li mode: An optional string, computation mode, a character string with the value range of [iou, iof] | |||||
| * @li atan_sub_flag: An optional bool, control whether to output atan_sub. \n | |||||
| * @par Outputs: | |||||
| * Two outputs, including: | |||||
| * @li overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N], | |||||
| * specifying the IoU or IoF ratio . | |||||
| * @li atan_sub: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N], | |||||
| * specifying the IoU or IoF ratio . \n | |||||
| * @attention Constraints: | |||||
| * "is_cross" only support false, "atan_sub_flag" only support true. | |||||
| */ | |||||
| REG_OP(CIoU) | |||||
| .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(atan_sub, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(trans, Bool, false) | |||||
| .ATTR(is_cross, Bool, true) | |||||
| .ATTR(mode, String, "iou") | |||||
| .ATTR(atan_sub_flag, Bool, false) | |||||
| .OP_END_FACTORY_REG(CIoU) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | ||||
| @@ -458,22 +458,22 @@ REG_OP(Softsign) | |||||
| .OP_END_FACTORY_REG(Softsign) | .OP_END_FACTORY_REG(Softsign) | ||||
| /** | /** | ||||
| * @brief Computes softsignGrad: y_grad / (1 + abs(x)) ** 2 . | |||||
| * @brief Computes softsignGrad: gradients / (1 + abs(features)) ** 2 . | |||||
| * | * | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Two inputs, including: | * Two inputs, including: | ||||
| * @li y_grad: A Tensor.Must be one of the following types:float16, float32, | |||||
| * @li x: A Tensor of the same type and shape as "gradients". | |||||
| * @li gradients: A Tensor.Must be one of the following types:float16, float32, | |||||
| * @li features: A Tensor of the same type and shape as "gradients". | |||||
| * @par x_grad: | |||||
| * output:A Tensor. Has the same type as "y_grad". | |||||
| * @par Outputs: | |||||
| * output:A Tensor. Has the same type as "gradients". | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * Compatible with the TensorFlow operator SoftsignGrad. | * Compatible with the TensorFlow operator SoftsignGrad. | ||||
| */ | */ | ||||
| REG_OP(SoftsignGrad) | REG_OP(SoftsignGrad) | ||||
| .INPUT(y_grad, TensorType::FloatingDataType()) | |||||
| .INPUT(x, TensorType::FloatingDataType()) | |||||
| .OUTPUT(x_grad, TensorType::FloatingDataType()) | |||||
| .INPUT(gradients, TensorType::FloatingDataType()) | |||||
| .INPUT(features, TensorType::FloatingDataType()) | |||||
| .OUTPUT(output, TensorType::FloatingDataType()) | |||||
| .OP_END_FACTORY_REG(SoftsignGrad) | .OP_END_FACTORY_REG(SoftsignGrad) | ||||
| /** | /** | ||||
| @@ -500,23 +500,23 @@ REG_OP(Selu) | |||||
| .OP_END_FACTORY_REG(Selu) | .OP_END_FACTORY_REG(Selu) | ||||
| /** | /** | ||||
| * @brief Computes SeluGrad backprops: y_grad * (y + scale * alpha) | |||||
| * if y < 0, scale * y_grad otherwise . | |||||
| * @brief Computes SeluGrad backprops: gradients * (outputs + scale * alpha) | |||||
| * if outputs < 0, scale * gradients otherwise . | |||||
| * @par Inputs: | * @par Inputs: | ||||
| * Two inputs, including: | * Two inputs, including: | ||||
| * @li y_grad: A Tensor of type RealNumberType . | |||||
| * @li y: A Tensor of type RealNumberType . | |||||
| * @li gradients: A Tensor of type RealNumberType . | |||||
| * @li outputs: A Tensor of type RealNumberType . | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * x_grad: A Tensor. Must have the same type as "y_grad" . | |||||
| * y: A Tensor. Must have the same type as "gradients" . | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * Compatible with the TensorFlow operator SeluGrad. | * Compatible with the TensorFlow operator SeluGrad. | ||||
| */ | */ | ||||
| REG_OP(SeluGrad) | REG_OP(SeluGrad) | ||||
| .INPUT(y_grad, TensorType::RealNumberType()) | |||||
| .INPUT(y, TensorType::RealNumberType()) | |||||
| .OUTPUT(x_grad, TensorType::RealNumberType()) | |||||
| .INPUT(gradients, TensorType::RealNumberType()) | |||||
| .INPUT(outputs, TensorType::RealNumberType()) | |||||
| .OUTPUT(y, TensorType::RealNumberType()) | |||||
| .OP_END_FACTORY_REG(SeluGrad) | .OP_END_FACTORY_REG(SeluGrad) | ||||
| /** | /** | ||||
| @@ -2612,6 +2612,34 @@ REG_OP(DynSeqOuter) | |||||
| .INPUT(seq_len2, TensorType({DT_INT32})) | .INPUT(seq_len2, TensorType({DT_INT32})) | ||||
| .OUTPUT(y, TensorType::BasicType()) | .OUTPUT(y, TensorType::BasicType()) | ||||
| .OP_END_FACTORY_REG(DynSeqOuter) | .OP_END_FACTORY_REG(DynSeqOuter) | ||||
| /** | |||||
| * @brief Returns sliced data based on max nmsed_num. \n | |||||
| * @par Inputs: | |||||
| * Four inputs, including: | |||||
| * @li input_nmsed_boxes: A Tensor. Must be the following types: float16. | |||||
| * @li input_nmsed_score: A Tensor. Must be the following types: float16. | |||||
| * @li input_nmsed_class: A Tensor. Must be the following types: float16. | |||||
| * @li input_nmsed_num: A Tensor. Must be the following types: int32. \n | |||||
| * @par Outputs: | |||||
| * output_nmsed_boxes: A Tensor. Must be the following type: float. | |||||
| * output_nmsed_score: A Tensor. Must be the following type: float. | |||||
| * output_nmsed_class: A Tensor. Must be the following type: float. \n | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(NonMaxSuppressionBucketize) | |||||
| .INPUT(input_nmsed_boxes, TensorType({DT_FLOAT16})) | |||||
| .INPUT(input_nmsed_score, TensorType({DT_FLOAT16})) | |||||
| .INPUT(input_nmsed_class, TensorType({DT_FLOAT16})) | |||||
| .INPUT(input_nmsed_num, TensorType({DT_INT32})) | |||||
| .OUTPUT(output_nmsed_boxes, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(output_nmsed_score, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(output_nmsed_class, TensorType({DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(NonMaxSuppressionBucketize) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | ||||
| @@ -1,50 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file slice_write_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_ | |||||
| #include "graph/operator_reg.h" | |||||
| namespace ge { | |||||
| /** | |||||
| *@brief write tensor value to tensor x. | |||||
| *@par Inputs: | |||||
| *x: A Tensor of type float16/float/double/int32/int64. \n | |||||
| *begin:A Tensor of type int32/int64. \n | |||||
| *value: A Tensor of type float16/float/double/int32/int64. | |||||
| *@par Outputs: | |||||
| *x: same tensor with input x | |||||
| */ | |||||
| REG_OP(SliceWrite) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ | |||||
| DT_INT32, DT_INT64})) | |||||
| .INPUT(begin, TensorType({DT_INT32, DT_INT64})) | |||||
| .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ | |||||
| DT_INT32, DT_INT64})) | |||||
| .OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ | |||||
| DT_INT32, DT_INT64})) | |||||
| .OP_END_FACTORY_REG(SliceWrite) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_ | |||||
| @@ -951,7 +951,7 @@ REG_OP(SerializeSparse) | |||||
| DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ | DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ | ||||
| DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) | DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) | ||||
| .INPUT(shape, TensorType({DT_INT64})) | .INPUT(shape, TensorType({DT_INT64})) | ||||
| .OUTPUT(serialized_sparse, TensorType({DT_STRING})) | |||||
| .OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT})) | |||||
| .ATTR(out_type, Type, DT_STRING) | .ATTR(out_type, Type, DT_STRING) | ||||
| .OP_END_FACTORY_REG(SerializeSparse) | .OP_END_FACTORY_REG(SerializeSparse) | ||||
| @@ -979,7 +979,7 @@ REG_OP(SerializeManySparse) | |||||
| DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ | DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ | ||||
| DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) | DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) | ||||
| .INPUT(shape, TensorType({DT_INT64})) | .INPUT(shape, TensorType({DT_INT64})) | ||||
| .OUTPUT(serialized_sparse, TensorType({DT_STRING})) | |||||
| .OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT})) | |||||
| .ATTR(out_type, Type, DT_STRING) | .ATTR(out_type, Type, DT_STRING) | ||||
| .OP_END_FACTORY_REG(SerializeManySparse) | .OP_END_FACTORY_REG(SerializeManySparse) | ||||
| @@ -1002,7 +1002,7 @@ REG_OP(SerializeManySparse) | |||||
| * Compatible with the TensorFlow operator DeserializeSparse. | * Compatible with the TensorFlow operator DeserializeSparse. | ||||
| */ | */ | ||||
| REG_OP(DeserializeSparse) | REG_OP(DeserializeSparse) | ||||
| .INPUT(serialized_sparse, TensorType({DT_STRING})) | |||||
| .INPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT})) | |||||
| .OUTPUT(indices, TensorType({DT_INT64})) | .OUTPUT(indices, TensorType({DT_INT64})) | ||||
| .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \ | .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \ | ||||
| DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ | DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ | ||||
| @@ -154,6 +154,98 @@ REG_OP(CalcBucketsLimitAndOffset) | |||||
| .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | ||||
| .REQUIRED_ATTR(total_limit, Int) | .REQUIRED_ATTR(total_limit, Int) | ||||
| .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | ||||
| /** | |||||
| *@brief get block tensor according to base addr tensor, for hccl remote read to use. | |||||
| *@par Inputs: | |||||
| *@li base_addr: A Tensor of type int64/uint64. \n | |||||
| *@li row:A Tensor of type int64/uint64. \n | |||||
| *@li col: A Tensor of type int64/uint64. | |||||
| *@par Outputs: | |||||
| *addr_table: list of [rank id, host addr, device addr, read size] | |||||
| *@par Attributes: | |||||
| *@li ori_shape: An required list int. Shape of base tensor. | |||||
| *@li block_size: An required list int. Shape of split block tensor. | |||||
| *@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to | |||||
| "Matrix". Currently only support Matrix storage | |||||
| *@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to | |||||
| "Matrix". Currently only support Matrix storage | |||||
| *@li rank_id: An optional int of rank id. Defaults is 0 | |||||
| *@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT | |||||
| */ | |||||
| REG_OP(IndexToAddr) | |||||
| .INPUT(base_addr, TensorType({DT_INT64, DT_UINT64})) | |||||
| .INPUT(x, TensorType({DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64})) | |||||
| .REQUIRED_ATTR(ori_shape, ListInt) | |||||
| .REQUIRED_ATTR(block_size, ListInt) | |||||
| .ATTR(ori_storage_mode, String, "Matrix") | |||||
| .ATTR(block_storage_mode, String, "Matrix") | |||||
| .ATTR(rank_id, Int, 0) | |||||
| .ATTR(dtype, Type, DT_FLOAT) | |||||
| .OP_END_FACTORY_REG(IndexToAddr) | |||||
| /** | |||||
| *@brief Convert one-dimensional coordinates to two-dimensional coordinates. | |||||
| *@par Inputs: | |||||
| *@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates. | |||||
| *@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W]. | |||||
| *@par Outputs: | |||||
| *@li row: row of two-dimensional | |||||
| *@li col: col of two-dimensional | |||||
| *@li n: col number of two-dimensional | |||||
| */ | |||||
| REG_OP(Coordinates1DTo2D) | |||||
| .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OP_END_FACTORY_REG(Coordinates1DTo2D) | |||||
| /** | |||||
| *@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU, | |||||
| y = 0 when i >= k && j < k, | |||||
| y = 1 when i == k && j == k, | |||||
| y = 2 when i > k && j == k, | |||||
| y = 3 when i == k && j > k, | |||||
| y = 4 when i > k && j > k, | |||||
| default y = 5 | |||||
| use for lu decomposition | |||||
| *@par Inputs: | |||||
| *x: A Tensor of type int32/int64/uint64. \n | |||||
| *@par Attributes: | |||||
| *algorithm: A string, only support LU now | |||||
| *@par Outputs: | |||||
| *y: A Tensor of type int32 | |||||
| */ | |||||
| REG_OP(CaseCondition) | |||||
| .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) | |||||
| .OUTPUT(y, TensorType({DT_INT32})) | |||||
| .ATTR(algorithm, String, "LU") | |||||
| .OP_END_FACTORY_REG(CaseCondition) | |||||
| /** | |||||
| *@brief write tensor value to tensor x. | |||||
| *@par Inputs: | |||||
| *x: A Tensor of type float16/float/double/int32/int64. \n | |||||
| *begin:A Tensor of type int32/int64. \n | |||||
| *value: A Tensor of type float16/float/double/int32/int64. | |||||
| *@par Outputs: | |||||
| *x: same tensor with input x | |||||
| */ | |||||
| REG_OP(SliceWrite) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ | |||||
| DT_INT32, DT_INT64})) | |||||
| .INPUT(begin, TensorType({DT_INT32, DT_INT64})) | |||||
| .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ | |||||
| DT_INT32, DT_INT64})) | |||||
| .OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ | |||||
| DT_INT32, DT_INT64})) | |||||
| .OP_END_FACTORY_REG(SliceWrite) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ | ||||
| @@ -263,6 +263,18 @@ typedef struct tagrtStreamLabelGotoTask_t { | |||||
| uint8_t reserved[36]; | uint8_t reserved[36]; | ||||
| } rtStreamLabelGotoTask_t; | } rtStreamLabelGotoTask_t; | ||||
| typedef struct tagrtNpuGetFloatStatusTask_t { | |||||
| uint64_t outputAddr; | |||||
| uint64_t outputSize; | |||||
| uint32_t checkMode; | |||||
| uint8_t reserved[20]; | |||||
| } rtNpuGetFloatStatusTask_t; | |||||
| typedef struct tagrtNpuClearFloatStatusTask_t { | |||||
| uint32_t checkMode; | |||||
| uint8_t reserved[36]; | |||||
| } rtNpuClearFloatStatusTask_t; | |||||
| typedef struct tagTaskInfo { | typedef struct tagTaskInfo { | ||||
| uint32_t type; | uint32_t type; | ||||
| uint32_t streamID; | uint32_t streamID; | ||||
| @@ -288,6 +300,8 @@ typedef struct tagTaskInfo { | |||||
| rtStreamSwitchNTaskInfo_t streamSwitchNTask; | rtStreamSwitchNTaskInfo_t streamSwitchNTask; | ||||
| rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask; | rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask; | ||||
| rtStreamLabelGotoTask_t streamLabelGotoTask; | rtStreamLabelGotoTask_t streamLabelGotoTask; | ||||
| rtNpuGetFloatStatusTask_t npuGetFloatStatusTask; | |||||
| rtNpuClearFloatStatusTask_t npuClearFloatStatusTask; | |||||
| uint32_t reserved[10]; | uint32_t reserved[10]; | ||||
| } u; | } u; | ||||
| } rtTaskInfo_t; | } rtTaskInfo_t; | ||||