| @@ -91,6 +91,7 @@ else() | |||||
| find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) | find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) | find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||||
| endif() | endif() | ||||
| # add compile flags | # add compile flags | ||||
| @@ -25,22 +25,11 @@ | |||||
| namespace ge { | namespace ge { | ||||
| enum ProfDataTypeConfig { | enum ProfDataTypeConfig { | ||||
| kProfAcl = 0x0001, | |||||
| kProfTaskTime = 0x0002, | kProfTaskTime = 0x0002, | ||||
| kProfAiCoreMetrics = 0x0004, | kProfAiCoreMetrics = 0x0004, | ||||
| kProfAicpuTrace = 0x0008, | kProfAicpuTrace = 0x0008, | ||||
| kProfModelExecute = 0x0010, | |||||
| kProfRuntimeApi = 0x0020, | |||||
| kProfRuntimeTrace = 0x0040, | |||||
| kProfScheduleTimeline = 0x0080, | |||||
| kProfScheduleTrace = 0x0100, | |||||
| kProfAiVectorCoreMetrics = 0x0200, | |||||
| kProfSubtaskTime = 0x0400, | |||||
| kProfTrainingTrace = 0x0800, | kProfTrainingTrace = 0x0800, | ||||
| kProfHcclTrace = 0x1000, | |||||
| kProfDataProcess = 0x2000, | |||||
| kProfTaskTrace = 0x3842, | |||||
| kProfModelLoad = 0x8000000000000000 | |||||
| kProfHcclTrace = 0x1000 | |||||
| }; | }; | ||||
| enum ProfilingAicoreMetrics { | enum ProfilingAicoreMetrics { | ||||
| @@ -49,20 +38,64 @@ enum ProfilingAicoreMetrics { | |||||
| kAicoreSynchronization = 2, | kAicoreSynchronization = 2, | ||||
| kAicoreMemory = 3, | kAicoreMemory = 3, | ||||
| kAicoreInternalMemory = 4, | kAicoreInternalMemory = 4, | ||||
| kAicoreStall = 5, | |||||
| kAicoreMetricsAll = 255 // only for op_trace | |||||
| kAicoreStall = 5 | |||||
| }; | }; | ||||
| typedef struct ProfAicoreEvents ProfAicoreEvents; | typedef struct ProfAicoreEvents ProfAicoreEvents; | ||||
| typedef struct aclgrphProfConfig aclgrphProfConfig; | typedef struct aclgrphProfConfig aclgrphProfConfig; | ||||
| /// | |||||
| /// @ingroup AscendCL | |||||
| /// @brief Initialize the profiling and set profiling configuration path | |||||
| /// @param [in] profiler_path: configuration path of profiling | |||||
| /// @param [in] length: length of configuration path | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status aclgrphProfInit(const char *profiler_path, uint32_t length); | Status aclgrphProfInit(const char *profiler_path, uint32_t length); | ||||
| /// | |||||
| /// @ingroup AscendCL | |||||
| /// @brief Finalize profiling | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status aclgrphProfFinalize(); | Status aclgrphProfFinalize(); | ||||
| /// | |||||
| /// @ingroup AscendCL | |||||
| /// @brief Create data of type aclgrphProfConfig | |||||
| /// @param [in] deviceid_list: device id list | |||||
| /// @param [in] device_nums: device numbers | |||||
| /// @param [in] aicore_metrics: type of aicore metrics | |||||
| /// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now | |||||
| /// @param [in] data_type_config: modules need profiling | |||||
| /// @return Status result of function | |||||
| /// | |||||
| aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, | aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, | ||||
| ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, | ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, | ||||
| uint64_t data_type_config); | uint64_t data_type_config); | ||||
| /// | |||||
| /// @ingroup AscendCL | |||||
| /// @brief Destroy data of type aclgrphProfConfig | |||||
| /// @param [in] profiler_config: config of profiling | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); | Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); | ||||
| /// | |||||
| /// @ingroup AscendCL | |||||
| /// @brief Start profiling of modules which is configured by profiler config | |||||
| /// @param [in] profiler_config: config of profiling | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status aclgrphProfStart(aclgrphProfConfig *profiler_config); | Status aclgrphProfStart(aclgrphProfConfig *profiler_config); | ||||
| /// | |||||
| /// @ingroup AscendCL | |||||
| /// @brief Stop profiling of modules which is configured by profiler config | |||||
| /// @param [in] profiler_config: config of profiling | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status aclgrphProfStop(aclgrphProfConfig *profiler_config); | Status aclgrphProfStop(aclgrphProfConfig *profiler_config); | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -48,6 +48,8 @@ enum OpEngineType { | |||||
| ENGINE_AIVECTOR = 4 // not support | ENGINE_AIVECTOR = 4 // not support | ||||
| }; | }; | ||||
| enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE }; | |||||
| const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | ||||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
| @@ -163,6 +163,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
| ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | ||||
| ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
| ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<ge::TensorDesc> &output_desc); | std::vector<ge::TensorDesc> &output_desc); | ||||
| @@ -141,8 +141,12 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS; | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_RELATED_AIPP_MODE; | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_AIPP_DATA_NAME_MAP; | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "external/graph/ge_error_codes.h" | #include "external/graph/ge_error_codes.h" | ||||
| #include "external/graph/tensor.h" | #include "external/graph/tensor.h" | ||||
| #include "ge_attr_value.h" | |||||
| namespace ge { | namespace ge { | ||||
| class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { | class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { | ||||
| @@ -32,10 +33,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { | |||||
| static void DestroyContext(const std::string &context_id); | static void DestroyContext(const std::string &context_id); | ||||
| graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); | graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); | ||||
| graphStatus GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor); | |||||
| graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); | graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); | ||||
| private: | private: | ||||
| std::map<int64_t, std::vector<Tensor>> tensors_; | std::map<int64_t, std::vector<Tensor>> tensors_; | ||||
| std::map<int64_t, std::vector<GeTensorPtr>> ge_tensors_; | |||||
| std::mutex mu_; | std::mutex mu_; | ||||
| static std::map<std::string, std::unique_ptr<RuntimeInferenceContext>> contexts_; | static std::map<std::string, std::unique_ptr<RuntimeInferenceContext>> contexts_; | ||||
| @@ -122,8 +122,12 @@ const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; | |||||
| const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | ||||
| const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; | const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; | ||||
| const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS = "_dynamic_aipp_input_dims"; | |||||
| const std::string ATTR_DATA_RELATED_AIPP_MODE = "_data_related_aipp_mode"; | |||||
| const std::string ATTR_DATA_AIPP_DATA_NAME_MAP = "_data_aipp_data_name_map"; | |||||
| const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added"; | const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added"; | ||||
| const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | ||||
| const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | ||||
| @@ -431,7 +431,7 @@ graphStatus GeTensorDesc::GetShapeRange(std::vector<std::pair<int64_t, int64_t>> | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| std::pair<int64_t, int64_t> pair({ele[0], ele[1]}); | std::pair<int64_t, int64_t> pair({ele[0], ele[1]}); | ||||
| range.push_back(pair); | |||||
| range.emplace_back(pair); | |||||
| } | } | ||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| @@ -33,7 +33,6 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
| ./utils/tuning_utils.cc \ | ./utils/tuning_utils.cc \ | ||||
| ./utils/graph_utils.cc \ | ./utils/graph_utils.cc \ | ||||
| ./utils/ge_ir_utils.cc \ | ./utils/ge_ir_utils.cc \ | ||||
| ./utils/node_utils.cc \ | |||||
| ./utils/op_desc_utils.cc \ | ./utils/op_desc_utils.cc \ | ||||
| ./utils/type_utils.cc \ | ./utils/type_utils.cc \ | ||||
| ./utils/tensor_utils.cc \ | ./utils/tensor_utils.cc \ | ||||
| @@ -44,6 +43,7 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
| option/ge_context.cc \ | option/ge_context.cc \ | ||||
| option/ge_local_context.cc \ | option/ge_local_context.cc \ | ||||
| ./runtime_inference_context.cc \ | ./runtime_inference_context.cc \ | ||||
| ./utils/node_utils.cc \ | |||||
| COMMON_LOCAL_C_INCLUDES := \ | COMMON_LOCAL_C_INCLUDES := \ | ||||
| proto/om.proto \ | proto/om.proto \ | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "graph/runtime_inference_context.h" | #include "graph/runtime_inference_context.h" | ||||
| #include "graph/utils/tensor_adapter.h" | |||||
| #include <cstdint> | #include <cstdint> | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| @@ -67,6 +68,14 @@ graphStatus RuntimeInferenceContext::SetTensor(int64_t node_id, int output_id, T | |||||
| GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id); | GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id); | ||||
| output_tensors[output_id] = std::move(tensor); | output_tensors[output_id] = std::move(tensor); | ||||
| auto &output_ge_tensors = ge_tensors_[node_id]; | |||||
| if (static_cast<uint32_t>(output_id) >= output_ge_tensors.size()) { | |||||
| output_ge_tensors.resize(output_id + 1); | |||||
| } | |||||
| GELOGD("Set ge tensor for node_id = %ld, output_id = %d", node_id, output_id); | |||||
| output_ge_tensors[output_id] = TensorAdapter::AsGeTensorPtr(tensor); | |||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| @@ -93,4 +102,28 @@ graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, T | |||||
| tensor = output_tensors[output_id]; | tensor = output_tensors[output_id]; | ||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor) { | |||||
| if (output_id < 0) { | |||||
| GELOGE(GRAPH_PARAM_INVALID, "Invalid output index: %d", output_id); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| std::lock_guard<std::mutex> lk(mu_); | |||||
| auto iter = ge_tensors_.find(node_id); | |||||
| if (iter == ge_tensors_.end()) { | |||||
| GELOGE(INTERNAL_ERROR, "Node not register. Id = %ld", node_id); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto &output_tensors = iter->second; | |||||
| if (static_cast<uint32_t>(output_id) >= output_tensors.size()) { | |||||
| GELOGE(GRAPH_FAILED, "Node output is not registered. node_id = %ld, output index = %d", node_id, output_id); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| GELOGD("Get ge tensor for node_id = %ld, output_id = %d", node_id, output_id); | |||||
| tensor = output_tensors[output_id]; | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "utils/node_utils.h" | |||||
| #include "utils/op_desc_utils.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/utils/op_desc_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "debug/ge_op_types.h" | #include "debug/ge_op_types.h" | ||||
| #include "debug/ge_util.h" | #include "debug/ge_util.h" | ||||
| @@ -23,8 +23,13 @@ | |||||
| #include "graph/anchor.h" | #include "graph/anchor.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "utils/tensor_utils.h" | |||||
| #include "utils/type_utils.h" | |||||
| #include "external/graph/operator.h" | |||||
| #include "graph/ge_context.h" | |||||
| #include "graph/runtime_inference_context.h" | |||||
| #include "graph/utils/op_desc_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/utils/tensor_adapter.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| namespace ge { | namespace ge { | ||||
| std::map<NodePtr, std::vector<uint32_t>> NodeUtils::map_send_info_{}; | std::map<NodePtr, std::vector<uint32_t>> NodeUtils::map_send_info_{}; | ||||
| @@ -575,6 +580,58 @@ graphStatus NodeUtils::GetNodeUnknownShapeStatus(const Node &node, bool &is_unkn | |||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
| GE_CHECK_NOTNULL(node_ptr); | |||||
| return NodeUtils::GetInputConstData(*node_ptr, dst_name, ge_tensor); | |||||
| } | |||||
| graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
| // For inner compute graph | |||||
| auto op_desc = node.GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| auto index = op_desc->GetInputIndexByName(dst_name); | |||||
| auto in_data_anchor = node.GetInDataAnchor(index); | |||||
| GE_CHECK_NOTNULL(in_data_anchor); | |||||
| auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_CHECK_NOTNULL(out_data_anchor); | |||||
| auto peer_node = out_data_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(peer_node); | |||||
| auto peer_op_desc = peer_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(peer_op_desc); | |||||
| auto peer_op_type = peer_op_desc->GetType(); | |||||
| if (peer_op_type == CONSTANTOP || peer_op_type == CONSTANT) { | |||||
| if (!AttrUtils::MutableTensor(peer_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { | |||||
| GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } else if (peer_op_type == DATA) { | |||||
| auto parent_node = NodeUtils::GetParentInput(peer_node); | |||||
| while ((parent_node != nullptr) && (parent_node->GetType() == DATA)) { | |||||
| parent_node = NodeUtils::GetParentInput(parent_node); | |||||
| } | |||||
| if ((parent_node != nullptr) && ((parent_node->GetType() == CONSTANT) || (parent_node->GetType() == CONSTANTOP))) { | |||||
| if (!AttrUtils::MutableTensor(parent_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { | |||||
| GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| } | |||||
| // Try get from runtime inference context | |||||
| auto session_id = std::to_string(GetContext().SessionId()); | |||||
| RuntimeInferenceContext *runtime_infer_ctx = nullptr; | |||||
| if (RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx) == GRAPH_SUCCESS) { | |||||
| GELOGD("To get constant from runtime inference context. session_id = %s", session_id.c_str()); | |||||
| auto ret = runtime_infer_ctx->GetTensor(peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx(), ge_tensor); | |||||
| if (ret == GRAPH_SUCCESS) { | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| } | |||||
| GELOGW("node[%s]'s input[%s]'s peer node is not const", node.GetName().c_str(), dst_name.c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| std::string NodeUtils::GetNodeType(const Node &node) { | std::string NodeUtils::GetNodeType(const Node &node) { | ||||
| if (node.GetType() != FRAMEWORKOP) { | if (node.GetType() != FRAMEWORKOP) { | ||||
| return node.GetType(); | return node.GetType(); | ||||
| @@ -587,14 +644,6 @@ std::string NodeUtils::GetNodeType(const Node &node) { | |||||
| std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } | std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } | ||||
| graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ||||
| auto op_desc = node.GetOpDesc(); | auto op_desc = node.GetOpDesc(); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| @@ -51,6 +51,7 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) | |||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) | ||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | ||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) | ||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain) | |||||
| include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
| include_directories(${CMAKE_BINARY_DIR}/proto/ge) | include_directories(${CMAKE_BINARY_DIR}/proto/ge) | ||||
| @@ -227,6 +228,7 @@ target_link_libraries(ge_runner | |||||
| ${runtime} | ${runtime} | ||||
| ${resouce} | ${resouce} | ||||
| ${ascend_hal} | ${ascend_hal} | ||||
| ${adump_server} | |||||
| rt | rt | ||||
| dl) | dl) | ||||
| @@ -237,6 +239,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
| "common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
| "common/dump/dump_op.cc" | "common/dump/dump_op.cc" | ||||
| "common/dump/dump_server.cc" | |||||
| "common/formats/format_transfers/*.cc" | "common/formats/format_transfers/*.cc" | ||||
| "common/formats/formats.cc" | "common/formats/formats.cc" | ||||
| "common/formats/utils/formats_trans_utils.cc" | "common/formats/utils/formats_trans_utils.cc" | ||||
| @@ -29,13 +29,14 @@ using std::vector; | |||||
| namespace { | namespace { | ||||
| const uint32_t kMaxDeviceNum = 64; | const uint32_t kMaxDeviceNum = 64; | ||||
| const std::string PROFILING_INIT = "prof_init"; | |||||
| const std::string PROFILING_FINALIZE = "prof_finalize"; | |||||
| const std::string PROFILING_START = "prof_start"; | |||||
| const std::string PROFILING_STOP = "prof_stop"; | |||||
| const std::string DEVICES_NUMS = "devNums"; | |||||
| const std::string DEVICE_ID_LIST = "devIdList"; | |||||
| const std::string AICORE_METRICS = "aicoreMetrics"; | |||||
| const uint32_t kDeviceListIndex = 3; | |||||
| const std::string kProfilingInit = "prof_init"; | |||||
| const std::string kProfilingFinalize = "prof_finalize"; | |||||
| const std::string kProfilingStart = "prof_start"; | |||||
| const std::string kProfilingStop = "prof_stop"; | |||||
| const std::string kDeviceNums = "devNums"; | |||||
| const std::string kDeviceIdList = "devIdList"; | |||||
| const std::string kAicoreMetrics = "aicoreMetrics"; | |||||
| const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToString = { | const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToString = { | ||||
| {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, | {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, | ||||
| @@ -43,25 +44,7 @@ const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToStri | |||||
| {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, | {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, | ||||
| {ge::kAicoreMemory, "AICORE_MEMORY"}, | {ge::kAicoreMemory, "AICORE_MEMORY"}, | ||||
| {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, | {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, | ||||
| {ge::kAicoreStall, "AICORE_STALL"}, | |||||
| {ge::kAicoreMetricsAll, "AICORE_METRICS_ALL"}}; | |||||
| const std::map<uint64_t, uint64_t> kDataTypeConfigMapping = {{ge::kProfAcl, PROF_ACL_API}, | |||||
| {ge::kProfTaskTime, PROF_TASK_TIME}, | |||||
| {ge::kProfAiCoreMetrics, PROF_AICORE_METRICS}, | |||||
| {ge::kProfAicpuTrace, PROF_AICPU_TRACE}, | |||||
| {ge::kProfModelExecute, PROF_MODEL_EXECUTE}, | |||||
| {ge::kProfRuntimeApi, PROF_RUNTIME_API}, | |||||
| {ge::kProfRuntimeTrace, PROF_RUNTIME_TRACE}, | |||||
| {ge::kProfScheduleTimeline, PROF_SCHEDULE_TIMELINE}, | |||||
| {ge::kProfScheduleTrace, PROF_SCHEDULE_TRACE}, | |||||
| {ge::kProfAiVectorCoreMetrics, PROF_AIVECTORCORE_METRICS}, | |||||
| {ge::kProfSubtaskTime, PROF_SUBTASK_TIME}, | |||||
| {ge::kProfTrainingTrace, PROF_TRAINING_TRACE}, | |||||
| {ge::kProfHcclTrace, PROF_HCCL_TRACE}, | |||||
| {ge::kProfDataProcess, PROF_DATA_PROCESS}, | |||||
| {ge::kProfTaskTrace, PROF_TASK_TRACE}, | |||||
| {ge::kProfModelLoad, PROF_MODEL_LOAD}}; | |||||
| {ge::kAicoreStall, "AICORE_STALL"}}; | |||||
| } // namespace | } // namespace | ||||
| static bool g_graph_prof_init_ = false; | static bool g_graph_prof_init_ = false; | ||||
| @@ -107,11 +90,11 @@ Status aclgrphProfInit(const char *profiler_path, uint32_t length) { | |||||
| GraphLoader graph_loader; | GraphLoader graph_loader; | ||||
| Command command; | Command command; | ||||
| command.cmd_params.clear(); | command.cmd_params.clear(); | ||||
| command.cmd_type = PROFILING_INIT; | |||||
| command.module_index = kProfModelLoad | kProfTrainingTrace; | |||||
| command.cmd_type = kProfilingInit; | |||||
| command.module_index = PROF_MODEL_LOAD; | |||||
| ret = graph_loader.CommandHandle(command); | ret = graph_loader.CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Handle profiling command %s failed, config = %s", PROFILING_INIT.c_str(), profiler_path); | |||||
| GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!g_graph_prof_init_) { | if (!g_graph_prof_init_) { | ||||
| @@ -143,10 +126,10 @@ Status aclgrphProfFinalize() { | |||||
| GraphLoader graph_loader; | GraphLoader graph_loader; | ||||
| Command command; | Command command; | ||||
| command.cmd_params.clear(); | command.cmd_params.clear(); | ||||
| command.cmd_type = PROFILING_FINALIZE; | |||||
| command.cmd_type = kProfilingFinalize; | |||||
| Status ret = graph_loader.CommandHandle(command); | Status ret = graph_loader.CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Handle profiling command %s failed.", PROFILING_FINALIZE.c_str()); | |||||
| GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -164,9 +147,9 @@ Status aclgrphProfFinalize() { | |||||
| bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<string> &prof_config_params) { | bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<string> &prof_config_params) { | ||||
| prof_config_params.clear(); | prof_config_params.clear(); | ||||
| prof_config_params.emplace_back(DEVICES_NUMS); | |||||
| prof_config_params.emplace_back(kDeviceNums); | |||||
| prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); | prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); | ||||
| prof_config_params.emplace_back(DEVICE_ID_LIST); | |||||
| prof_config_params.emplace_back(kDeviceIdList); | |||||
| std::string devID = ""; | std::string devID = ""; | ||||
| if (profiler_config->config.devNums == 0) { | if (profiler_config->config.devNums == 0) { | ||||
| GELOGW("The device num is invalid."); | GELOGW("The device num is invalid."); | ||||
| @@ -180,7 +163,7 @@ bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<str | |||||
| } | } | ||||
| prof_config_params.push_back(devID); | prof_config_params.push_back(devID); | ||||
| prof_config_params.push_back(AICORE_METRICS); | |||||
| prof_config_params.push_back(kAicoreMetrics); | |||||
| auto iter = | auto iter = | ||||
| kProfAicoreMetricsToString.find(static_cast<ProfilingAicoreMetrics>(profiler_config->config.aicoreMetrics)); | kProfAicoreMetricsToString.find(static_cast<ProfilingAicoreMetrics>(profiler_config->config.aicoreMetrics)); | ||||
| if (iter == kProfAicoreMetricsToString.end()) { | if (iter == kProfAicoreMetricsToString.end()) { | ||||
| @@ -250,13 +233,7 @@ aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t dev | |||||
| } | } | ||||
| config->config.aicoreMetrics = static_cast<ProfAicoreMetrics>(aicore_metrics); | config->config.aicoreMetrics = static_cast<ProfAicoreMetrics>(aicore_metrics); | ||||
| uint64_t data_type = 0; | |||||
| for (auto &iter : kDataTypeConfigMapping) { | |||||
| if ((iter.first & data_type_config) == iter.first) { | |||||
| data_type |= iter.second; | |||||
| } | |||||
| } | |||||
| config->config.dataTypeConfig = data_type; | |||||
| config->config.dataTypeConfig = data_type_config; | |||||
| GELOGI("Successfully create prof config."); | GELOGI("Successfully create prof config."); | ||||
| return config; | return config; | ||||
| } | } | ||||
| @@ -309,9 +286,11 @@ Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { | |||||
| GraphLoader graph_loader; | GraphLoader graph_loader; | ||||
| Command command; | Command command; | ||||
| command.cmd_params.clear(); | command.cmd_params.clear(); | ||||
| command.cmd_type = PROFILING_START; | |||||
| command.cmd_type = kProfilingStart; | |||||
| command.cmd_params = prof_params; | command.cmd_params = prof_params; | ||||
| command.module_index = profiler_config->config.dataTypeConfig; | command.module_index = profiler_config->config.dataTypeConfig; | ||||
| GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), | |||||
| prof_params[kDeviceListIndex].c_str(), command.module_index); | |||||
| ret = graph_loader.CommandHandle(command); | ret = graph_loader.CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Handle profiling command failed"); | GELOGE(ret, "Handle profiling command failed"); | ||||
| @@ -360,9 +339,11 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { | |||||
| GraphLoader graph_loader; | GraphLoader graph_loader; | ||||
| Command command; | Command command; | ||||
| command.cmd_params.clear(); | command.cmd_params.clear(); | ||||
| command.cmd_type = PROFILING_STOP; | |||||
| command.cmd_type = kProfilingStop; | |||||
| command.cmd_params = prof_params; | command.cmd_params = prof_params; | ||||
| command.module_index = profiler_config->config.dataTypeConfig; | command.module_index = profiler_config->config.dataTypeConfig; | ||||
| GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), | |||||
| prof_params[kDeviceListIndex].c_str(), command.module_index); | |||||
| ret = graph_loader.CommandHandle(command); | ret = graph_loader.CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Handle profiling command failed"); | GELOGE(ret, "Handle profiling command failed"); | ||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "adx_datadump_server.h" | |||||
| int AdxDataDumpServerUnInit() { return 0; } | |||||
| int AdxDataDumpServerInit() { return 0; } | |||||
| @@ -55,19 +55,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| bool convert_2_phy_device_id) { | bool convert_2_phy_device_id) { | ||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| vector<int32_t>().swap(device_id_); | vector<int32_t>().swap(device_id_); | ||||
| // profiling need phy device id | |||||
| if (!convert_2_phy_device_id) { | |||||
| device_id_.push_back(options.device_id); | |||||
| } else { | |||||
| uint32_t phy_device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
| return FAILED; | |||||
| } | |||||
| device_id_.push_back(phy_device_id); | |||||
| } | |||||
| job_id_ = options.job_id; | job_id_ = options.job_id; | ||||
| Status ret; | Status ret; | ||||
| @@ -76,6 +63,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| ret = InitFromAclCfg(recv_profiling_config_); | ret = InitFromAclCfg(recv_profiling_config_); | ||||
| } else { | } else { | ||||
| ret = InitFromOptions(options); | ret = InitFromOptions(options); | ||||
| if (ret == SUCCESS && is_load_profiling_) { | |||||
| // profiling need phy device id | |||||
| if (!convert_2_phy_device_id) { | |||||
| device_id_.push_back(options.device_id); | |||||
| } else { | |||||
| uint32_t phy_device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
| return FAILED; | |||||
| } | |||||
| device_id_.push_back(phy_device_id); | |||||
| } | |||||
| } | |||||
| } | } | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Failed to init profiling."); | GELOGE(ret, "Failed to init profiling."); | ||||
| @@ -868,14 +869,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin | |||||
| } | } | ||||
| GELOGI("Current logic_device_id:%d", logic_device_id); | GELOGI("Current logic_device_id:%d", logic_device_id); | ||||
| uint32_t phy_device_id = 0; | |||||
| rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
| } | |||||
| GELOGI("Current phy_device_id:%d", phy_device_id); | |||||
| bool execute_model_prof_on = false; | bool execute_model_prof_on = false; | ||||
| auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
| auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||||
| if (iter != device_id_.end()) { | if (iter != device_id_.end()) { | ||||
| execute_model_prof_on = true; | execute_model_prof_on = true; | ||||
| } | } | ||||
| @@ -58,7 +58,7 @@ const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M | |||||
| const int kMaxFileSizeLimit = INT_MAX; | const int kMaxFileSizeLimit = INT_MAX; | ||||
| const int kMaxBuffSize = 256; | const int kMaxBuffSize = 256; | ||||
| const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; | const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; | ||||
| constexpr uint32_t MAX_CONFIG_FILE_BYTE = 10 * 1024 * 1024; | |||||
| constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -512,9 +512,9 @@ FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { | |||||
| stat.st_mode); | stat.st_mode); | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (stat.st_size > MAX_CONFIG_FILE_BYTE) { | |||||
| if (stat.st_size > kMaxConfigFileByte) { | |||||
| GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", | GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", | ||||
| resolved_file_path.c_str(), stat.st_size, MAX_CONFIG_FILE_BYTE); | |||||
| resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte); | |||||
| return false; | return false; | ||||
| } | } | ||||
| return true; | return true; | ||||
| @@ -745,6 +745,22 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo | |||||
| GELOGI("GetAIPPInfo succ."); | GELOGI("GetAIPPInfo succ."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
| GELOGI("Begin to get aipp type."); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("Get aipp type is not success."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("Get aipp type success."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | ||||
| GELOGI("Begin to get dynamic batch output shape info"); | GELOGI("Begin to get dynamic batch output shape info"); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| @@ -29,6 +29,7 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
| common/dump/dump_properties.cc \ | common/dump/dump_properties.cc \ | ||||
| common/dump/dump_manager.cc \ | common/dump/dump_manager.cc \ | ||||
| common/dump/dump_op.cc \ | common/dump/dump_op.cc \ | ||||
| common/dump/dump_server.cc \ | |||||
| common/helper/model_cache_helper.cc \ | common/helper/model_cache_helper.cc \ | ||||
| ge_local_engine/engine/host_cpu_engine.cc \ | ge_local_engine/engine/host_cpu_engine.cc \ | ||||
| @@ -371,7 +372,6 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) | LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) | ||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
| libadump_server_stub \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
| libc_sec \ | libc_sec \ | ||||
| @@ -436,7 +436,6 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) | |||||
| LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) | LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) | ||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
| libadump_server_stub \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
| libc_sec \ | libc_sec \ | ||||
| @@ -1,5 +1,5 @@ | |||||
| LOCAL_PATH := $(call my-dir) | LOCAL_PATH := $(call my-dir) | ||||
| include $(LOCAL_PATH)/stub/Makefile | |||||
| LIBGE_LOCAL_SRC_FILES := \ | LIBGE_LOCAL_SRC_FILES := \ | ||||
| proto/fusion_model.proto \ | proto/fusion_model.proto \ | ||||
| proto/optimizer_priority.proto \ | proto/optimizer_priority.proto \ | ||||
| @@ -392,8 +392,8 @@ endif | |||||
| LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | ||||
| LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc | |||||
| LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_prof.cc | |||||
| LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ | |||||
| ../../out/ge/lib64/stub/ge_prof.cc \ | |||||
| LOCAL_SHARED_LIBRARIES := | LOCAL_SHARED_LIBRARIES := | ||||
| @@ -413,7 +413,8 @@ BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<stri | |||||
| life_time_(0) {} | life_time_(0) {} | ||||
| BlockMemAssigner::~BlockMemAssigner() { | BlockMemAssigner::~BlockMemAssigner() { | ||||
| for (MemoryBlock *memory_block : memory_blocks_) { | |||||
| GELOGD("blocks_store_ size : %lu", blocks_store_.size()); | |||||
| for (MemoryBlock *memory_block : blocks_store_) { | |||||
| GE_DELETE_NEW_SINGLE(memory_block); | GE_DELETE_NEW_SINGLE(memory_block); | ||||
| } | } | ||||
| } | } | ||||
| @@ -544,7 +545,7 @@ bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const Me | |||||
| } | } | ||||
| bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
| uint32_t &peer_input_index) { | |||||
| uint32_t &peer_input_index, bool &no_need_assign_memory) { | |||||
| if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -571,6 +572,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
| // If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
| (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | ||||
| GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), | |||||
| GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); | |||||
| no_need_assign_memory = true; return false;); | |||||
| if (is_input_continuous) { | if (is_input_continuous) { | ||||
| if (n->GetOwnerComputeGraph() != nullptr) { | if (n->GetOwnerComputeGraph() != nullptr) { | ||||
| string graph_name = n->GetOwnerComputeGraph()->GetName(); | string graph_name = n->GetOwnerComputeGraph()->GetName(); | ||||
| @@ -828,6 +834,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| } | } | ||||
| } | } | ||||
| memory_blocks_.emplace_back(block); | memory_blocks_.emplace_back(block); | ||||
| blocks_store_.emplace_back(block); | |||||
| return block; | return block; | ||||
| } | } | ||||
| @@ -1143,8 +1150,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| bool out_node_set_continuous_input = false; | bool out_node_set_continuous_input = false; | ||||
| bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); | bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); | ||||
| if (!no_need_assign_memory) { | if (!no_need_assign_memory) { | ||||
| out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); | |||||
| no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); | |||||
| out_node_set_continuous_input = | |||||
| IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory); | |||||
| GE_IF_BOOL_EXEC(!no_need_assign_memory, | |||||
| no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); | |||||
| } | } | ||||
| no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); | no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); | ||||
| if (no_need_assign_memory) { | if (no_need_assign_memory) { | ||||
| @@ -259,6 +259,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
| ge::ComputeGraphPtr compute_graph_; | ge::ComputeGraphPtr compute_graph_; | ||||
| std::vector<MemoryBlock *> memory_blocks_; | std::vector<MemoryBlock *> memory_blocks_; | ||||
| std::vector<MemoryBlock *> blocks_store_; | |||||
| std::vector<NodeTypeIndex> zero_memory_list_; | std::vector<NodeTypeIndex> zero_memory_list_; | ||||
| @@ -357,7 +358,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
| bool IsZeroCopyBlock(const NodePtr &node, bool continuous); | bool IsZeroCopyBlock(const NodePtr &node, bool continuous); | ||||
| bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
| uint32_t &peer_input_index); | |||||
| uint32_t &peer_input_index, bool &no_need_assign_memory); | |||||
| /// | /// | ||||
| /// @ingroup GE | /// @ingroup GE | ||||
| @@ -39,6 +39,33 @@ const size_t kVirtualInputNodeOutputSize = 1; | |||||
| const size_t kVirtualOutputNodeInputSize = 1; | const size_t kVirtualOutputNodeInputSize = 1; | ||||
| const size_t kVirtualNodeDataIndex = 0; | const size_t kVirtualNodeDataIndex = 0; | ||||
| const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; | const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; | ||||
| int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol, | |||||
| const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors, | |||||
| const ge::NodePtr &node, const uint32_t i) { | |||||
| ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut); | |||||
| auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString()); | |||||
| if (iter1 == anchor_to_symbol.end()) { | |||||
| return ge::kInvalidOffset; | |||||
| } | |||||
| auto out_symbol = iter1->second; | |||||
| auto iter2 = symbol_to_anchors.find(out_symbol); | |||||
| if (iter2 == symbol_to_anchors.end()) { | |||||
| return ge::kInvalidOffset; | |||||
| } | |||||
| for (const auto &node_index_io : iter2->second) { | |||||
| if (node_index_io.value_ == out_symbol) { | |||||
| vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset(); | |||||
| vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset(); | |||||
| if (node_index_io.index_ >= symbol_output_list.size()) { | |||||
| return ge::kInvalidOffset; | |||||
| } | |||||
| GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i, | |||||
| output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_)); | |||||
| return symbol_output_list.at(node_index_io.index_); | |||||
| } | |||||
| } | |||||
| return ge::kInvalidOffset; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| Status VariableMemoryAssigner::Assign() { | Status VariableMemoryAssigner::Assign() { | ||||
| @@ -1191,6 +1218,12 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
| } | } | ||||
| Status GraphMemoryAssigner::CheckOffset() { | Status GraphMemoryAssigner::CheckOffset() { | ||||
| std::map<std::string, std::string> anchor_to_symbol; | |||||
| std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | |||||
| if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset(); | vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset(); | ||||
| @@ -1200,13 +1233,26 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| bool need_update_output = false; | |||||
| vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset(); | vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset(); | ||||
| for (auto output : output_list) { | |||||
| if (output == ge::kInvalidOffset) { | |||||
| for (uint32_t i = 0; i < output_list.size(); ++i) { | |||||
| if (output_list[i] == ge::kInvalidOffset) { | |||||
| GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) { | |||||
| auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i); | |||||
| if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) { | |||||
| output_list[i] = symbol_offset; | |||||
| need_update_output = true; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| if (need_update_output) { | |||||
| node->GetOpDesc()->SetOutputOffset(output_list); | |||||
| } | |||||
| vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace(); | vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace(); | ||||
| for (auto workspace : workspace_list) { | for (auto workspace : workspace_list) { | ||||
| if (workspace == ge::kInvalidOffset) { | if (workspace == ge::kInvalidOffset) { | ||||
| @@ -592,7 +592,17 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI | |||||
| GELOGW("GetAIPPInfo is not success."); | GELOGW("GetAIPPInfo is not success."); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetAippType(model_id, index, type, aipp_index); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("Get aipp type is not success."); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -75,6 +75,8 @@ class GraphExecutor { | |||||
| static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
| static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
| @@ -125,7 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||||
| rt_model_stream_(nullptr), | rt_model_stream_(nullptr), | ||||
| is_inner_model_stream_(false), | is_inner_model_stream_(false), | ||||
| is_async_mode_(false), | is_async_mode_(false), | ||||
| last_execute_mode_(false), | |||||
| last_execute_mode_(INITIALIZATION), | |||||
| session_id_(0), | session_id_(0), | ||||
| device_id_(0), | device_id_(0), | ||||
| maxDumpOpNum_(0), | maxDumpOpNum_(0), | ||||
| @@ -1573,6 +1573,48 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| // Set default value | |||||
| type = DATA_WITHOUT_AIPP; | |||||
| aipp_index = 0xFFFFFFFF; // default invalid value | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| GE_CHECK_NOTNULL(data_op); | |||||
| if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { | |||||
| GELOGW("There is no aipp releated info with index %u.", index); | |||||
| return SUCCESS; | |||||
| } | |||||
| std::string data_mode; | |||||
| (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); | |||||
| if (data_mode == "static_aipp") { | |||||
| type = DATA_WITH_STATIC_AIPP; | |||||
| } else if (data_mode == "dynamic_aipp") { | |||||
| type = DATA_WITH_DYNAMIC_AIPP; | |||||
| } else if (data_mode == "dynamic_aipp_conf") { | |||||
| type = DYNAMIC_AIPP_NODE; | |||||
| } else { | |||||
| GELOGE(INTERNAL_ERROR, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (type == DATA_WITH_DYNAMIC_AIPP) { | |||||
| string releated_name; | |||||
| (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | |||||
| for (size_t i = 0; i < data_op_list_.size(); ++i) { | |||||
| GE_CHECK_NOTNULL(data_op_list_[i]); | |||||
| if (data_op_list_[i]->GetName() == releated_name) { | |||||
| GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); | |||||
| aipp_index = i; | |||||
| } | |||||
| } | |||||
| if (aipp_index == 0xFFFFFFFF) { | |||||
| GELOGE(INTERNAL_ERROR, "Can not find aipp data node from index %u", index); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type) { | void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type) { | ||||
| batch_size_.clear(); | batch_size_.clear(); | ||||
| if (batch_num.empty()) { | if (batch_num.empty()) { | ||||
| @@ -1666,9 +1708,9 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, | |||||
| return; | return; | ||||
| } | } | ||||
| // judge if this data is linked dynamic aipp first, multiply batch has been considered | // judge if this data is linked dynamic aipp first, multiply batch has been considered | ||||
| if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { | |||||
| if (op_desc->HasAttr(ATTR_DYNAMIC_AIPP_INPUT_DIMS)) { | |||||
| vector<int64_t> dynamic_aipp_input_dims; | vector<int64_t> dynamic_aipp_input_dims; | ||||
| (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_input_dims); | |||||
| SetInputDimsInfo(dynamic_aipp_input_dims, format, input); | SetInputDimsInfo(dynamic_aipp_input_dims, format, input); | ||||
| return; | return; | ||||
| } else { | } else { | ||||
| @@ -3371,11 +3413,15 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { | |||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status DavinciModel::InitModelStream(rtStream_t stream) { | Status DavinciModel::InitModelStream(rtStream_t stream) { | ||||
| ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION; | |||||
| GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR, | |||||
| "NnExecute not support mix execute."); | |||||
| last_execute_mode_ = curr_mode; | |||||
| // asynchronize mode, use user input stream. | // asynchronize mode, use user input stream. | ||||
| if (is_async_mode_) { | if (is_async_mode_) { | ||||
| rt_model_stream_ = stream; | rt_model_stream_ = stream; | ||||
| is_inner_model_stream_ = false; | is_inner_model_stream_ = false; | ||||
| last_execute_mode_ = true; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -3387,14 +3433,12 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { | |||||
| rt_model_stream_ = stream; | rt_model_stream_ = stream; | ||||
| is_inner_model_stream_ = false; | is_inner_model_stream_ = false; | ||||
| last_execute_mode_ = false; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (last_execute_mode_ || (rt_model_stream_ == nullptr)) { | |||||
| if (rt_model_stream_ == nullptr) { | |||||
| GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT)); | GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT)); | ||||
| is_inner_model_stream_ = true; | is_inner_model_stream_ = true; | ||||
| last_execute_mode_ = false; | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -75,6 +75,12 @@ struct timeInfo { | |||||
| int64_t dumpEndTime; | int64_t dumpEndTime; | ||||
| }; | }; | ||||
| enum ExecuteMode { | |||||
| INITIALIZATION, | |||||
| SYNCHRONIZATION, | |||||
| ASYNCHRONIZATION, | |||||
| }; | |||||
| // comments | // comments | ||||
| class DavinciModel { | class DavinciModel { | ||||
| public: | public: | ||||
| @@ -314,6 +320,8 @@ class DavinciModel { | |||||
| /// | /// | ||||
| Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); | Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); | ||||
| Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get model_id. | /// @brief Get model_id. | ||||
| @@ -884,7 +892,7 @@ class DavinciModel { | |||||
| bool is_inner_model_stream_; | bool is_inner_model_stream_; | ||||
| bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | ||||
| bool last_execute_mode_; | |||||
| ExecuteMode last_execute_mode_; | |||||
| bool is_stream_list_bind_{false}; | bool is_stream_list_bind_{false}; | ||||
| bool is_pure_head_stream_{false}; | bool is_pure_head_stream_{false}; | ||||
| @@ -876,6 +876,14 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo | |||||
| return davinci_model->GetAIPPInfo(index, aipp_info); | return davinci_model->GetAIPPInfo(index, aipp_info); | ||||
| } | } | ||||
| Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", | |||||
| model_id); | |||||
| return davinci_model->GetAippType(index, type, aipp_index); | |||||
| } | |||||
| Status ModelManager::GenSessionId(uint64_t &session_id) { | Status ModelManager::GenSessionId(uint64_t &session_id) { | ||||
| std::lock_guard<std::mutex> lock(session_id_create_mutex_); | std::lock_guard<std::mutex> lock(session_id_create_mutex_); | ||||
| @@ -224,6 +224,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| /// | /// | ||||
| ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
| ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| /// @brief set model input and output size zero copy | /// @brief set model input and output size zero copy | ||||
| @@ -2795,11 +2795,18 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vector<std::st | |||||
| GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str()); | GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } else { | } else { | ||||
| auto var_tensor = var_results[var_name].GetTensorDesc(); | |||||
| var_tensor.SetName(var_name); | |||||
| var_results[var_name].SetTensorDesc(var_tensor); | |||||
| var_values.emplace_back(var_results[var_name]); | var_values.emplace_back(var_results[var_name]); | ||||
| } | } | ||||
| } | } | ||||
| } else { | } else { | ||||
| for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) { | for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) { | ||||
| string var_name = iter->first; | |||||
| auto var_tensor = iter->second.GetTensorDesc(); | |||||
| var_tensor.SetName(var_name); | |||||
| iter->second.SetTensorDesc(var_tensor); | |||||
| var_values.emplace_back(iter->second); | var_values.emplace_back(iter->second); | ||||
| } | } | ||||
| } | } | ||||
| @@ -491,7 +491,7 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I | |||||
| if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { | if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { | ||||
| auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); | auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); | ||||
| GE_CHECK_NOTNULL(new_identity); | GE_CHECK_NOTNULL(new_identity); | ||||
| if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS && | |||||
| if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS || | |||||
| GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { | GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", | GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", | ||||
| pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), | pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), | ||||
| @@ -176,6 +176,9 @@ Status SubgraphPass::WhileInputNodes(const ComputeGraphPtr &graph, const NodePtr | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | ||||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | NodePtr in_node = peer_out_anchor->GetOwnerNode(); | ||||
| GE_CHECK_NOTNULL(in_node); | GE_CHECK_NOTNULL(in_node); | ||||
| if (in_node->GetType() == VARIABLE || in_node->GetType() == VARHANDLEOP || in_node->GetType() == VARIABLEV2) { | |||||
| continue; | |||||
| } | |||||
| // Input->While and Input link to other nodes need insert memcpy | // Input->While and Input link to other nodes need insert memcpy | ||||
| if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { | if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { | ||||
| GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); | GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); | ||||
| @@ -124,7 +124,14 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| } | } | ||||
| GELOGE(PARAM_INVALID, "when dynamic aipp, shape must be in range [3, 4], but is %zu", shape.size()); | |||||
| string errormsg = | |||||
| "its shape size must be in range[3,4] which dynamic aipp is linked, " | |||||
| "maybe this input is not suitable for dynamic aipp"; | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | |||||
| "E10001", {"parameter", "value", "reason"}, | |||||
| {data_node->GetName() + " shape size", to_string(shape.size()), errormsg}); | |||||
| GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu", | |||||
| data_node->GetName().c_str(), shape.size()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -272,7 +279,6 @@ Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aip | |||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, | GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, | ||||
| "Set kCurrentAippIndex attr for aipp node failed"); | "Set kCurrentAippIndex attr for aipp node failed"); | ||||
| // add input/output desc | // add input/output desc | ||||
| GeTensorDesc tensor; | GeTensorDesc tensor; | ||||
| GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); | GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); | ||||
| @@ -318,6 +324,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr | |||||
| GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); | GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| data_node_linked_aipp = data_node; | |||||
| auto data_opdesc = data_node->GetOpDesc(); | auto data_opdesc = data_node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(data_opdesc); | GE_CHECK_NOTNULL(data_opdesc); | ||||
| string set_dt_str; | string set_dt_str; | ||||
| @@ -330,10 +337,17 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| // add dynamic or static attr memsage to data | |||||
| if (GetAippMode() == domi::AippOpParams::static_) { | |||||
| (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); | |||||
| } else if (GetAippMode() == domi::AippOpParams::dynamic) { | |||||
| (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); | |||||
| } | |||||
| // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize | // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize | ||||
| GeAttrValue::NAMED_ATTRS aipp_attr; | GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
| ConvertParamToAttr(aipp_attr); | ConvertParamToAttr(aipp_attr); | ||||
| if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) { | |||||
| if (!AttrUtils::SetNamedAttrs(data_opdesc, ATTR_NAME_AIPP, aipp_attr)) { | |||||
| GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); | GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -737,7 +751,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { | |||||
| data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); | data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); | ||||
| } | } | ||||
| vector<int64_t> dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; | vector<int64_t> dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; | ||||
| (void)AttrUtils::SetListInt(data_op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_linked_data_shape); | |||||
| (void)AttrUtils::SetListInt(data_op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_linked_data_shape); | |||||
| int64_t batch_count = -1; | int64_t batch_count = -1; | ||||
| if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { | if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { | ||||
| @@ -759,7 +773,24 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { | |||||
| return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); | return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); | ||||
| } | } | ||||
| Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { | |||||
| // Add dynamic aipp config to aipp_data | |||||
| GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
| ConvertParamToAttr(aipp_attr); | |||||
| (void)AttrUtils::SetNamedAttrs(aipp_data_op_desc, ATTR_NAME_AIPP, aipp_attr); | |||||
| (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); | |||||
| // add node name attr to data linked aipp_data, it can be queried by acl. | |||||
| GE_CHECK_NOTNULL(data_node_linked_aipp); | |||||
| auto data_op_desc = data_node_linked_aipp->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(data_op_desc); | |||||
| (void)AttrUtils::SetStr(data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, aipp_data_op_desc->GetName()); | |||||
| (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, data_op_desc->GetName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | ||||
| static int index = 0; | |||||
| std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | ||||
| GeShape input_shape(input_shape_dim); | GeShape input_shape(input_shape_dim); | ||||
| // construct input tensor | // construct input tensor | ||||
| @@ -767,18 +798,21 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp | |||||
| TensorUtils::SetReuseInput(input_tensor, false); | TensorUtils::SetReuseInput(input_tensor, false); | ||||
| TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | ||||
| // Only flush subgraph name | |||||
| const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | ||||
| string node_name = (graph->GetParentGraph() == nullptr) ? kDynamicAippData : (graph->GetName() + "_" + node_name); | |||||
| string node_name; | |||||
| if (index == 0) { | |||||
| node_name = kDynamicAippData; | |||||
| } else { | |||||
| node_name = string(kDynamicAippData) + "_" + to_string(index); | |||||
| } | |||||
| ++index; | |||||
| // new add aipp_data ops for dynamic aipp param input | // new add aipp_data ops for dynamic aipp param input | ||||
| OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | ||||
| GE_CHECK_NOTNULL(op_desc_ptr_data); | GE_CHECK_NOTNULL(op_desc_ptr_data); | ||||
| // Add dynamic aipp config to aipp_data | |||||
| GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
| ConvertParamToAttr(aipp_attr); | |||||
| (void)AttrUtils::SetNamedAttrs(op_desc_ptr_data, ATTR_NAME_AIPP, aipp_attr); | |||||
| if (AddAttrToAippData(op_desc_ptr_data) != SUCCESS) { | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); | auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); | ||||
| @@ -78,9 +78,11 @@ class AippOp : public InsertOpBase { | |||||
| Status CreateAippData(const NodePtr &aipp); | Status CreateAippData(const NodePtr &aipp); | ||||
| Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); | Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); | ||||
| Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); | Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); | ||||
| Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc); | |||||
| domi::AippOpParams *aipp_params_ = nullptr; | domi::AippOpParams *aipp_params_ = nullptr; | ||||
| ge::NodePtr aipp_node_ = nullptr; | ge::NodePtr aipp_node_ = nullptr; | ||||
| ge::NodePtr data_node_linked_aipp = nullptr; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
| #include "common/util.h" | #include "common/util.h" | ||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| @@ -120,15 +121,15 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() { | |||||
| for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { | for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { | ||||
| const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); | const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); | ||||
| GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;); | |||||
| GE_IF_BOOL_EXEC( | |||||
| item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || | |||||
| item->input_edge_idx(0) == another_item->input_edge_idx(0), | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx."); | |||||
| return PARAM_INVALID;); | |||||
| GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(), | |||||
| string errormsg = | |||||
| "Can not insert aipp to the same postion! Please ensure related_input_rank" | |||||
| " param is different in different aipp config."; | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Can not insert aipp op to the same postion! Please ensure related_input_rank param " | |||||
| "is different in different aipp config."); | |||||
| return PARAM_INVALID;); | |||||
| } | } | ||||
| } | } | ||||
| @@ -162,18 +163,12 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { | |||||
| std::unique_ptr<domi::AippOpParams> aippParams(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> aippParams(new (std::nothrow) domi::AippOpParams()); | ||||
| GE_CHECK_NOTNULL(aippParams); | GE_CHECK_NOTNULL(aippParams); | ||||
| GE_IF_BOOL_EXEC(aippNodes.size() > 0, GE_CHK_STATUS(GetAippParams(aippParams, aippNodes[0])); | |||||
| aippMode = (aippMode == domi::AippOpParams::undefined) ? aippParams->aipp_mode() : aippMode; | |||||
| GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID, | |||||
| "The aipp_mode of all aipp_op must be the same");); | |||||
| GE_IF_BOOL_EXEC( | GE_IF_BOOL_EXEC( | ||||
| aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { | aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { | ||||
| std::unique_ptr<domi::AippOpParams> currAippParam(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> currAippParam(new (std::nothrow) domi::AippOpParams()); | ||||
| GE_CHECK_NOTNULL(currAippParam); | GE_CHECK_NOTNULL(currAippParam); | ||||
| GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); | GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); | ||||
| GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, | |||||
| "The aipp_mode of all aipp_op must be the same"); | |||||
| if (aippMode == domi::AippOpParams::static_) { | if (aippMode == domi::AippOpParams::static_) { | ||||
| GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, | GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, | ||||
| "The input_format of all aipp_ops after one Data should be the same"); | "The input_format of all aipp_ops after one Data should be the same"); | ||||
| @@ -41,6 +41,7 @@ | |||||
| #include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
| #include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
| using std::map; | |||||
| using std::set; | using std::set; | ||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| @@ -265,27 +266,24 @@ Status MultiBatchGraphCopyer::Init() { | |||||
| } | } | ||||
| Status MultiBatchGraphCopyer::LabelStatus() { | Status MultiBatchGraphCopyer::LabelStatus() { | ||||
| for (const auto &data : origin_data_nodes_) { | |||||
| auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||||
| if (!IsAllDimsPositive(data_shape.GetDims())) { | |||||
| origin_nodes_status_[data.get()] = kNodeInBatchBranch; | |||||
| } | |||||
| } | |||||
| map<string, vector<NodePtr>> frame_enters; | |||||
| InitStatus(frame_enters); | |||||
| bool changed = true; | bool changed = true; | ||||
| // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch | // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch | ||||
| while (changed) { | while (changed) { | ||||
| changed = false; | changed = false; | ||||
| for (const auto &node : origin_all_nodes_) { | for (const auto &node : origin_all_nodes_) { | ||||
| auto iter = origin_nodes_status_.find(node.get()); | |||||
| if (iter != origin_nodes_status_.end()) { | |||||
| continue; | |||||
| } | |||||
| for (auto &in_node : node->GetInAllNodes()) { | for (auto &in_node : node->GetInAllNodes()) { | ||||
| bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && | bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && | ||||
| origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; | origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; | ||||
| if (is_in_batch) { | if (is_in_batch) { | ||||
| origin_nodes_status_[node.get()] = kNodeInBatchBranch; | |||||
| changed = true; | |||||
| if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || | |||||
| origin_nodes_status_[node.get()] != kNodeInBatchBranch) { | |||||
| origin_nodes_status_[node.get()] = kNodeInBatchBranch; | |||||
| ResetEnterStatus(frame_enters, node); | |||||
| changed = true; | |||||
| } | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| @@ -316,6 +314,45 @@ Status MultiBatchGraphCopyer::LabelStatus() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) { | |||||
| for (const auto &node : origin_all_nodes_) { | |||||
| if (node->GetType() != ENTER && node->GetType() != REFENTER) { | |||||
| continue; | |||||
| } | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| string frame_name; | |||||
| if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { | |||||
| frame_enters[frame_name].emplace_back(node); | |||||
| } | |||||
| } | |||||
| for (const auto &data : origin_data_nodes_) { | |||||
| auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||||
| if (!IsAllDimsPositive(data_shape.GetDims())) { | |||||
| origin_nodes_status_[data.get()] = kNodeInBatchBranch; | |||||
| } | |||||
| } | |||||
| } | |||||
| void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) { | |||||
| if (node->GetType() != ENTER && node->GetType() != REFENTER) { | |||||
| return; | |||||
| } | |||||
| for (const auto &frame_enter : frame_enters) { | |||||
| auto &enters = frame_enter.second; | |||||
| if (std::find(enters.begin(), enters.end(), node) != enters.end()) { | |||||
| for (const auto &enter : enters) { | |||||
| origin_nodes_status_[enter.get()] = kNodeInBatchBranch; | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| Status MultiBatchGraphCopyer::CreateNewNodes() { | Status MultiBatchGraphCopyer::CreateNewNodes() { | ||||
| shape_data_ = InsertShapeDataNode(); | shape_data_ = InsertShapeDataNode(); | ||||
| if (shape_data_ == nullptr) { | if (shape_data_ == nullptr) { | ||||
| @@ -68,6 +68,8 @@ class MultiBatchGraphCopyer { | |||||
| // label status for origin_all_nodes_ | // label status for origin_all_nodes_ | ||||
| Status LabelStatus(); | Status LabelStatus(); | ||||
| void InitStatus(std::map<string, vector<NodePtr>> &frame_enters); | |||||
| void ResetEnterStatus(std::map<string, vector<NodePtr>> &frame_enters, const NodePtr &node); | |||||
| // add nodes functions | // add nodes functions | ||||
| Status CreateNewNodes(); | Status CreateNewNodes(); | ||||
| @@ -722,8 +722,15 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||||
| GE_CHECK_NOTNULL(node_item); | GE_CHECK_NOTNULL(node_item); | ||||
| auto task_defs = model.GetTaskDefs(node); | auto task_defs = model.GetTaskDefs(node); | ||||
| GE_CHECK_NOTNULL(task_defs); | GE_CHECK_NOTNULL(task_defs); | ||||
| GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", | |||||
| node->GetName().c_str(), (*task_defs).size()); | |||||
| if (node_item->shape_inference_type != DEPEND_COMPUTE) { | |||||
| GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", | |||||
| node->GetName().c_str(), (*task_defs).size()); | |||||
| } else { | |||||
| // The number of tasks of the fourth type operator may be 2 | |||||
| GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, | |||||
| "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", node->GetName().c_str(), | |||||
| (*task_defs).size()); | |||||
| } | |||||
| const auto &task_def = (*task_defs)[0]; | const auto &task_def = (*task_defs)[0]; | ||||
| std::shared_ptr<AicpuNodeTaskBase> aicpu_task; | std::shared_ptr<AicpuNodeTaskBase> aicpu_task; | ||||
| if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) { | if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) { | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include "adx_datadump_server.h" | |||||
| #include "common/dump/dump_properties.h" | #include "common/dump/dump_properties.h" | ||||
| #include "common/util.h" | #include "common/util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| @@ -76,10 +77,12 @@ Status InnerSession::Initialize() { | |||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| dump_properties.InitByOptions(); | dump_properties.InitByOptions(); | ||||
| GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed"); | |||||
| ret = graph_manager_.Initialize(options_); | ret = graph_manager_.Initialize(options_); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); | GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); | ||||
| GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -87,6 +90,7 @@ Status InnerSession::Initialize() { | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "failed to set malloc size"); | GELOGE(ret, "failed to set malloc size"); | ||||
| (void)graph_manager_.Finalize(); | (void)graph_manager_.Finalize(); | ||||
| GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); | |||||
| GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -97,6 +101,7 @@ Status InnerSession::Initialize() { | |||||
| ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); | ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "failed to init session instance"); | GELOGE(ret, "failed to init session instance"); | ||||
| GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); | |||||
| } | } | ||||
| init_flag_ = true; | init_flag_ = true; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -122,6 +127,7 @@ Status InnerSession::Finalize() { | |||||
| (void)VarManager::Instance(session_id_)->FreeVarMemory(); | (void)VarManager::Instance(session_id_)->FreeVarMemory(); | ||||
| GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | ||||
| GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed"); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -297,4 +303,27 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vector<std::st | |||||
| return graph_manager_.SaveVariables(graph, var_names, outputs, var_values); | return graph_manager_.SaveVariables(graph, var_names, outputs, var_values); | ||||
| } | } | ||||
| Status InnerSession::AddDumpProperties(const DumpProperties &dump_properties) { | |||||
| if (!is_dump_server_inited_) { | |||||
| if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
| GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server init failed"); | |||||
| return PARAM_INVALID) | |||||
| GELOGI("Init adx data dump server success"); | |||||
| is_dump_server_inited_ = true; | |||||
| } | |||||
| } | |||||
| PropertiesManager::Instance().AddDumpProperties(session_id_, dump_properties); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status InnerSession::RemoveDumpProperties() { | |||||
| PropertiesManager::Instance().RemoveDumpProperties(session_id_); | |||||
| if (is_dump_server_inited_ && PropertiesManager::Instance().GetDumpPropertiesMap().empty()) { | |||||
| GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server uninit failed"); | |||||
| return PARAM_INVALID) | |||||
| GELOGI("UnInit adx data dump server success"); | |||||
| is_dump_server_inited_ = false; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -63,6 +63,10 @@ class InnerSession { | |||||
| bool IsGraphNeedRebuild(uint32_t graph_id); | bool IsGraphNeedRebuild(uint32_t graph_id); | ||||
| Status AddDumpProperties(const DumpProperties &dump_properties); | |||||
| Status RemoveDumpProperties(); | |||||
| private: | private: | ||||
| bool init_flag_; | bool init_flag_; | ||||
| uint64_t session_id_; | uint64_t session_id_; | ||||
| @@ -30,7 +30,7 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const size_t kDataMemAlignSize = 32; | const size_t kDataMemAlignSize = 32; | ||||
| size_t GetAlignedSize(uint32_t size) { | |||||
| size_t GetAlignedSize(size_t size) { | |||||
| size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | ||||
| return aligned_size; | return aligned_size; | ||||
| } | } | ||||
| @@ -40,6 +40,8 @@ image normalization (by subtracting the mean value or multiplying a factor), ima | |||||
| *features: The AIPP-processed output tensor of type float16 or uint8. | *features: The AIPP-processed output tensor of type float16 or uint8. | ||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
| *@par Restrictions: | |||||
| *Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. | |||||
| */ | */ | ||||
| REG_OP(Aipp) | REG_OP(Aipp) | ||||
| .INPUT(images, TensorType{DT_UINT8}) | .INPUT(images, TensorType{DT_UINT8}) | ||||
| @@ -1143,6 +1143,9 @@ REG_OP(Add) | |||||
| *@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
| * Compatible with the TensorFlow operator LRN. | * Compatible with the TensorFlow operator LRN. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(FusedMulAdd) | REG_OP(FusedMulAdd) | ||||
| @@ -2464,6 +2467,8 @@ REG_OP(PopulationCount) | |||||
| * @li y3: A Tensor. Must be one of the following types: float16, float32. | * @li y3: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li y4: A Tensor. Must be one of the following types: float16, float32. \n | * @li y4: A Tensor. Must be one of the following types: float16, float32. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LambNextMVWithDecay) | REG_OP(LambNextMVWithDecay) | ||||
| .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2510,6 +2515,9 @@ REG_OP(LambNextMVWithDecay) | |||||
| *@li y2: A Tensor. Has the same type as "input_mul3". | *@li y2: A Tensor. Has the same type as "input_mul3". | ||||
| *@li y3: A Tensor. Has the same type as "input_mul3". | *@li y3: A Tensor. Has the same type as "input_mul3". | ||||
| *@li y4: A Tensor. Has the same type as "input_mul3". | *@li y4: A Tensor. Has the same type as "input_mul3". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LambNextMV) | REG_OP(LambNextMV) | ||||
| .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2548,6 +2556,8 @@ REG_OP(LambNextMV) | |||||
| * @li y1: A Tensor of the same type as "input_square". | * @li y1: A Tensor of the same type as "input_square". | ||||
| * @li y2: A Tensor of the same type as "input_square". \n | * @li y2: A Tensor of the same type as "input_square". \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LambNextRight) | REG_OP(LambNextRight) | ||||
| .INPUT(input_square, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input_square, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2578,6 +2588,8 @@ REG_OP(LambNextRight) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor of the same type as "input_greater1". \n | *y: A Tensor of the same type as "input_greater1". \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LambUpdateWithLr) | REG_OP(LambUpdateWithLr) | ||||
| .INPUT(input_greater1, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input_greater1, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2608,6 +2620,8 @@ REG_OP(LambUpdateWithLr) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor of the same type as input. \n | *y: A Tensor of the same type as input. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LambUpdateWithLrV2) | REG_OP(LambUpdateWithLrV2) | ||||
| .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2643,6 +2657,8 @@ REG_OP(LambUpdateWithLrV2) | |||||
| * @li output1: A Tensor. Must be one of the following types: float16, float32. | * @li output1: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(AdamApplyOneWithDecay) | REG_OP(AdamApplyOneWithDecay) | ||||
| .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2683,6 +2699,8 @@ REG_OP(AdamApplyOneWithDecay) | |||||
| * @li output1: A Tensor. Must be one of the following types: float16, float32. | * @li output1: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(AdamApplyOne) | REG_OP(AdamApplyOne) | ||||
| .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2723,6 +2741,8 @@ REG_OP(AdamApplyOne) | |||||
| * @li output1: A Tensor. Must be one of the following types: float16, float32. | * @li output1: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(AdamApplyOneWithDecayAssign) | REG_OP(AdamApplyOneWithDecayAssign) | ||||
| .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2763,6 +2783,8 @@ REG_OP(AdamApplyOneWithDecayAssign) | |||||
| * @li output1: A Tensor. Must be one of the following types: float16, float32. | * @li output1: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | * @li output2: A Tensor. Must be one of the following types: float16, float32. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(AdamApplyOneAssign) | REG_OP(AdamApplyOneAssign) | ||||
| .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2793,6 +2815,8 @@ REG_OP(AdamApplyOneAssign) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor of the same type as "x". \n | *y: A Tensor of the same type as "x". \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ClipByNormNoDivSum) | REG_OP(ClipByNormNoDivSum) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2817,6 +2841,9 @@ REG_OP(ClipByNormNoDivSum) | |||||
| *Two outputs, including: \n | *Two outputs, including: \n | ||||
| *@li y1: A Tensor. Has the same type as "x". | *@li y1: A Tensor. Has the same type as "x". | ||||
| *@li y2: A Tensor. Has the same type as "x". | *@li y2: A Tensor. Has the same type as "x". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(SquareSumV2) | REG_OP(SquareSumV2) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2839,6 +2866,9 @@ REG_OP(SquareSumV2) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| y: A Tensor. Has the same type as "x". | y: A Tensor. Has the same type as "x". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(SquareSumV1) | REG_OP(SquareSumV1) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2857,6 +2887,9 @@ REG_OP(SquareSumV1) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| y1: A Tensor. Has the same type as "x1".The result of "x1". | y1: A Tensor. Has the same type as "x1".The result of "x1". | ||||
| y2: A Tensor. Has the same type as "x2".The result of "x2". | y2: A Tensor. Has the same type as "x2".The result of "x2". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(SquareSumAll) | REG_OP(SquareSumAll) | ||||
| .INPUT(x1, TensorType({DT_FLOAT})) | .INPUT(x1, TensorType({DT_FLOAT})) | ||||
| @@ -2876,6 +2909,9 @@ REG_OP(SquareSumAll) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type as "x1". | * y: A Tensor. Has the same type as "x1". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(FusedMulAddN) | REG_OP(FusedMulAddN) | ||||
| .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) | .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) | ||||
| @@ -2942,6 +2978,9 @@ If false, don’t keep these dimensions. Default:False. \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *@li output0: A Tensor result of which input0 dot multily input1. | *@li output0: A Tensor result of which input0 dot multily input1. | ||||
| *@li output1: A Tensor result of which input0 dot multily input1, then reducesum it. | *@li output1: A Tensor result of which input0 dot multily input1, then reducesum it. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ConfusionMulGrad) | REG_OP(ConfusionMulGrad) | ||||
| .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2965,6 +3004,9 @@ REG_OP(ConfusionMulGrad) | |||||
| *@li y1: A Tensor of shape and dtype of first output, which should have \n | *@li y1: A Tensor of shape and dtype of first output, which should have \n | ||||
| shape (1,) and dtype as input. | shape (1,) and dtype as input. | ||||
| *@li y2: A Tensor of shape and dtype of second output, should be same shape and type as input. | *@li y2: A Tensor of shape and dtype of second output, should be same shape and type as input. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(FusedMulAddNL2loss) | REG_OP(FusedMulAddNL2loss) | ||||
| .INPUT(x1, TensorType::NumberType()) | .INPUT(x1, TensorType::NumberType()) | ||||
| @@ -3186,6 +3228,9 @@ REG_OP(KLDiv) | |||||
| *y: A Tensor. Has the same type as "x". \n | *y: A Tensor. Has the same type as "x". \n | ||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(TensorMove) | REG_OP(TensorMove) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) | ||||
| @@ -3197,20 +3242,18 @@ REG_OP(TensorMove) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *One inputs, including: | *One inputs, including: | ||||
| * @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. \n | |||||
| * @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *x: A Tensor. Has the same type as "x". \n | |||||
| *output_x: A Tensor. Has the same type as "x". \n | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| */ | */ | ||||
| REG_OP(TensorRedirect) | REG_OP(TensorRedirect) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | ||||
| DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE, | |||||
| DT_COMPLEX64})) | |||||
| DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | |||||
| .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | ||||
| DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE, | |||||
| DT_COMPLEX64})) | |||||
| DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | |||||
| .OP_END_FACTORY_REG(TensorRedirect) | .OP_END_FACTORY_REG(TensorRedirect) | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -68,6 +68,9 @@ REG_OP(CacheUpdate) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *The output is dynamic for attribute func_name. | *The output is dynamic for attribute func_name. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(InternalDataMove) | REG_OP(InternalDataMove) | ||||
| .INPUT(x, TensorType::ALL()) | .INPUT(x, TensorType::ALL()) | ||||
| @@ -585,13 +585,20 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| /** | /** | ||||
| *@brief Computes a 2D convolution given 4D "x" and "filter" tensors. | *@brief Computes a 2D convolution given 4D "x" and "filter" tensors. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| * @li x: A 4D tensor of input images. | |||||
| * @li filter: A 4D tensor of filters. | |||||
| * @li bias: An optional 1D tensor. | |||||
| * @li offset_w: An optional 1D tensor for quantized convolution. Reserved. | |||||
| * | |||||
| * The input and output tensor attributes are listed as follows: | |||||
| * @verbatim | |||||
| *@li x: A 4D tensor of input images. With "NHWC" format, the shape is | |||||
| * [batch, in_height, in_width, in_channels]. | |||||
| *@li filter: A 4D tensor of filters. Has the same type as "x". With "HWCN" | |||||
| * format, the shape is [filter_height, filter_width, in_channels, | |||||
| * out_channels]. | |||||
| *@li bias: An optional 1D tensor. Shape is [out_channels]. | |||||
| *@li offset_w: An optional 1D tensor for quantized convolution. Shape is | |||||
| * [out_channels]. Reserved. | |||||
| *\n | |||||
| *\n | |||||
| * Note that there is a strict data type mapping between the input and output | |||||
| * tensors: | |||||
| *@verbatim | |||||
| |Tensor | x | filter | bias | offset_w | y | |Tensor | x | filter | bias | offset_w | y | ||||
| -----------|---------|---------|---------|----------|-------- | -----------|---------|---------|---------|----------|-------- | ||||
| |Data Type | float16 | float16 | float16 | _ | float16 | |Data Type | float16 | float16 | float16 | _ | float16 | ||||
| @@ -601,69 +608,84 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| | | int8 | int8 | int32 | int8 | int32 | | | int8 | int8 | int32 | int8 | int32 | ||||
| -----------|---------|---------|---------|----------|-------- | -----------|---------|---------|---------|----------|-------- | ||||
| |Format | NCHW | NCHW | ND | ND | NCHW | |Format | NCHW | NCHW | ND | ND | NCHW | ||||
| | | NHWC | NHWC | | | NHWC | |||||
| | | | HWCN | | | | |||||
| | | NHWC | HWCN | | | NHWC | |||||
| @endverbatim | @endverbatim | ||||
| * It should be noted that the data types must correspond to each other, but the | |||||
| * format does not need to . \n | |||||
| * Type float32 is allowed only in mixed precision (float32->float16) scenarios. | |||||
| * Mixed precision is enabled by default. | |||||
| * \n | |||||
| * | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * @li strides: A list of 4 integers. Specifying the strides of the | |||||
| *@li strides: Required. A list of 4 integers. Specifying the strides of the | |||||
| * convolution along the height and width. The dimension order is determined | * convolution along the height and width. The dimension order is determined | ||||
| * by the data format of "x". By default the N and C dimensions are set to 1. | * by the data format of "x". By default the N and C dimensions are set to 1. | ||||
| * @li pads: A list of 4 integers. Specifying the top, bottom, left and right | |||||
| * padding. | |||||
| * @li dilations: A list of 4 integers. Specifying the dilation rate to use | |||||
| * for dilated convolution. Has the same dimension order and value as "strides". | |||||
| * @li groups: Number of blocked connections from input channels to output | |||||
| * channels. Input channels and output channels must both be divisible by | |||||
| * "groups".Type is int32. | |||||
| * @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0". | |||||
| * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the | |||||
| * data format of the input and output images. Type is string. Defaults to "NHWC". Reserved . \n | |||||
| *@par Outputs: | |||||
| * @li y: A 4D Tensor of output images . \n | |||||
| *@attention | |||||
| * @li The parameter scope is listed as follows: | |||||
| * @verbatim | |||||
| |Name | Field | Scope | |||||
| ------------------|--------------|---------- | |||||
| |Input Image Size | H dimension | [1, 4096] | |||||
| | | W dimension | [1, 4096] | |||||
| ------------------|--------------|---------- | |||||
| |Filter Size | H dimension | [1, 255] | |||||
| | | W dimension | [1, 255] | |||||
| ------------------|--------------|---------- | |||||
| |Stride Size | H dimension | [1, 63] | |||||
| | | W dimension | [1, 63] | |||||
| ------------------|--------------|---------- | |||||
| |Padding Size | top side | [0, 255] | |||||
| | | bottom side | [0, 255] | |||||
| | | left side | [0, 255] | |||||
| | | right side | [0, 255] | |||||
| ------------------|--------------|---------- | |||||
| |Dilation Size | H dimension | [1, 255] | |||||
| | W dimension | [1, 255] | |||||
| *@li pads: Required. A list of 4 integers. Specifying the top, bottom, left | |||||
| * and right padding. | |||||
| * @li dilations: Optional. A list of 4 integers. Specifying the dilation rate | |||||
| * to use for dilated convolution. Has the same dimension order and value as | |||||
| * "strides". Defaults to [1, 1, 1, 1]. | |||||
| * @li groups: Optional. An integer of type int32, for the number of blocked | |||||
| * connections from input channels to output channels. Input channels and output | |||||
| * channels must both be divisible by "groups". "x" in_channels must be equal to | |||||
| * "filter" in_channels * groups. Defaults to 1. | |||||
| * @li offset_x: Optional. An integer of type int32, for quantized convolution. | |||||
| * Defaults to 0. | |||||
| * @li data_format: Reserved and optional. A string from: "NHWC" and "NCHW". | |||||
| * Specifying the data format of the input and output images. Defaults to | |||||
| * "NHWC". | |||||
| *\n | |||||
| *\n | |||||
| * The following value range restrictions must be met: | |||||
| *@verbatim | |||||
| |Name | Field | Scope | |||||
| ------------------|----------|---------- | |||||
| |Input Image Size | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| ------------------|----------|---------- | |||||
| |Filter Size | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| ------------------|----------|---------- | |||||
| |Stride | H | [1, 63] | |||||
| | | W | [1, 63] | |||||
| ------------------|----------|---------- | |||||
| |Padding | top | [0, 255] | |||||
| | | bottom | [0, 255] | |||||
| | | left | [0, 255] | |||||
| | | right | [0, 255] | |||||
| ------------------|----------|---------- | |||||
| |Dilation | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| @endverbatim | @endverbatim | ||||
| * @li There are restrictions for certain scenarios: | |||||
| * @verbatim | |||||
| Output | Restrictions | |||||
| ------------------|---------------------------------------------- | |||||
| W dimension == 1 | HxW(input) == HxW(filter) | |||||
| H dimension == 1 | | |||||
| ------------------|---------------------------------------------- | |||||
| W dimension == 1 | Not supported | |||||
| H dimension != 1 | | |||||
| * | |||||
| *@par Outputs: | |||||
| *@li y: A 4D Tensor of output images. Has the same type and format as "x". With | |||||
| * "NHWC" format, the shape is [batch, out_height, out_width, out_channels]. | |||||
| *\n | |||||
| * out_height = (in_height + top_pad + bottom_pad - | |||||
| * dilation_h * (filter_height - 1) - 1) | |||||
| * / stride_h + 1 | |||||
| *\n | |||||
| * out_width = (in_width + left_pad + right_pad - | |||||
| * dilation_w * (filter_width - 1) - 1) | |||||
| * / stride_w + 1 | |||||
| * | |||||
| *@attention Constraints: | |||||
| *@li The following restrictions on the output must be met: | |||||
| *@verbatim | |||||
| | Output | Restrictions | |||||
| -------------------|--------------------------- | |||||
| | W dimension == 1 | H*W(input) == H*W(filter) | |||||
| | H dimension == 1 | | |||||
| -------------------|--------------------------- | |||||
| | W dimension == 1 | Not supported | |||||
| | H dimension != 1 | | |||||
| @endverbatim | @endverbatim | ||||
| * As shown above, "HxW(input)" indicates the image size after padding and | |||||
| * "HxW(filter)" indicates the filter size after dilation . \n | |||||
| * "H * W (input)" indicates the image size after padding and "H * W (filter)" | |||||
| * indicates the filter size after dilation. | |||||
| *\n | |||||
| * | |||||
| *@par Quantization supported or not | *@par Quantization supported or not | ||||
| * Yes | |||||
| *@li Yes | |||||
| * | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *@li Compatible with the TensorFlow operator "conv2d". | *@li Compatible with the TensorFlow operator "conv2d". | ||||
| *@li Compatible with the Caffe operator 2D "Convolution". | *@li Compatible with the Caffe operator 2D "Convolution". | ||||
| @@ -1035,6 +1035,9 @@ REG_OP(ROIPooling) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * @ decoded_boxes: A Tensor. Must have the same type as box_predictions. | * @ decoded_boxes: A Tensor. Must have the same type as box_predictions. | ||||
| * N-D with shape [N, 4]. | * N-D with shape [N, 4]. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DecodeBbox) | REG_OP(DecodeBbox) | ||||
| .INPUT(box_predictions, TensorType{DT_FLOAT16}) | .INPUT(box_predictions, TensorType{DT_FLOAT16}) | ||||
| @@ -1052,6 +1055,9 @@ REG_OP(DecodeBbox) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4]. | *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4]. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ClipBoxes) | REG_OP(ClipBoxes) | ||||
| .INPUT(boxes_input, TensorType({DT_FLOAT16})) | .INPUT(boxes_input, TensorType({DT_FLOAT16})) | ||||
| @@ -1270,6 +1276,9 @@ REG_OP(RpnProposalPostProcessing) | |||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * @ boundary_encoded: A Tensor. Must be float16. | * @ boundary_encoded: A Tensor. Must be float16. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DecodeBoundariesTarget) | REG_OP(DecodeBoundariesTarget) | ||||
| .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) | .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) | ||||
| @@ -1287,6 +1296,9 @@ REG_OP(DecodeBoundariesTarget) | |||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * @ keypoints_decoded: A Tensor. Must be float16. | * @ keypoints_decoded: A Tensor. Must be float16. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DecodeCornerpointsTargetBG) | REG_OP(DecodeCornerpointsTargetBG) | ||||
| .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) | .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) | ||||
| @@ -1304,6 +1316,9 @@ REG_OP(DecodeCornerpointsTargetBG) | |||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * @ keypoints_decoded: A Tensor. Must be float16. | * @ keypoints_decoded: A Tensor. Must be float16. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DecodeCornerpointsTargetWrtCenterV1) | REG_OP(DecodeCornerpointsTargetWrtCenterV1) | ||||
| .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) | .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) | ||||
| @@ -1321,6 +1336,9 @@ REG_OP(DecodeCornerpointsTargetWrtCenterV1) | |||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * @ boundary_encoded: A Tensor. Must be float16. | * @ boundary_encoded: A Tensor. Must be float16. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DecodeWheelsTarget) | REG_OP(DecodeWheelsTarget) | ||||
| .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) | .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) | ||||
| @@ -335,6 +335,8 @@ REG_OP(LogSoftmaxV2) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor of the same type as "grad" . \n | * y: A Tensor of the same type as "grad" . \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ConfusionSoftmaxGrad) | REG_OP(ConfusionSoftmaxGrad) | ||||
| .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -499,6 +501,9 @@ REG_OP(LayerNorm) | |||||
| * @li pd_x: A Tensor. Must be one of the following types: float16, float32. | * @li pd_x: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LayerNormGrad) | REG_OP(LayerNormGrad) | ||||
| .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
| @@ -540,6 +545,9 @@ REG_OP(LayerNormGrad) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *Three outputs, including: | *Three outputs, including: | ||||
| * @li pd_x: A Tensor. Must be one of the following types: float16, float32. | * @li pd_x: A Tensor. Must be one of the following types: float16, float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LayerNormXBackprop) | REG_OP(LayerNormXBackprop) | ||||
| .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
| @@ -579,6 +587,9 @@ REG_OP(LayerNormXBackprop) | |||||
| *Three outputs, including: | *Three outputs, including: | ||||
| * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | ||||
| * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(LayerNormBetaGammaBackprop) | REG_OP(LayerNormBetaGammaBackprop) | ||||
| .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
| @@ -811,6 +822,9 @@ instruction . \n | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *@li Compatible with the PyTorch operator GroupNorm. | *@li Compatible with the PyTorch operator GroupNorm. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(GroupNorm) | REG_OP(GroupNorm) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -862,6 +876,9 @@ Specifies the variance of "x" . \n | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *@li Compatible with the PyTorch operator InstanceNorm. | *@li Compatible with the PyTorch operator InstanceNorm. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(InstanceNormV2) | REG_OP(InstanceNormV2) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -2031,6 +2031,9 @@ REG_OP(ApplyAdadeltaD) | |||||
| * Two outputs, including: | * Two outputs, including: | ||||
| * @li var: A mutable Tensor has the same type as "var". | * @li var: A mutable Tensor has the same type as "var". | ||||
| * @li accum: A mutable Tensor has the same type as "var". | * @li accum: A mutable Tensor has the same type as "var". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(FusedMulApplyMomentum) | REG_OP(FusedMulApplyMomentum) | ||||
| .INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
| @@ -2079,6 +2082,9 @@ REG_OP(FusedMulApplyMomentum) | |||||
| * @li var: A Tensor has the type float32. | * @li var: A Tensor has the type float32. | ||||
| * @li var_copy: A Tensor has the type float16. | * @li var_copy: A Tensor has the type float16. | ||||
| * @li accum: A Tensor has the same type as input "accum". | * @li accum: A Tensor has the same type as input "accum". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(FusedMulApplyMomentumExtern) | REG_OP(FusedMulApplyMomentumExtern) | ||||
| .INPUT(var, TensorType(DT_FLOAT)) | .INPUT(var, TensorType(DT_FLOAT)) | ||||
| @@ -2581,6 +2587,8 @@ REG_OP(SparseApplyAdadeltaD) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * @li automic_add_mem_size: sizes of workspaces . \n | * @li automic_add_mem_size: sizes of workspaces . \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(AtomicAddrClean) | REG_OP(AtomicAddrClean) | ||||
| .ATTR(automic_add_mem_size, ListInt, {}) | .ATTR(automic_add_mem_size, ListInt, {}) | ||||
| @@ -30,6 +30,9 @@ namespace ge { | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *data: A Tensor of data value. Must be float32. | *data: A Tensor of data value. Must be float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(NPUAllocFloatStatusOperator) | REG_OP(NPUAllocFloatStatusOperator) | ||||
| .OUTPUT(data, TensorType({DT_FLOAT})) | .OUTPUT(data, TensorType({DT_FLOAT})) | ||||
| @@ -43,6 +46,9 @@ REG_OP(NPUAllocFloatStatusOperator) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *data: A Tensor of data value. Must be float32. | *data: A Tensor of data value. Must be float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(NPUClearFloatStatusOperator) | REG_OP(NPUClearFloatStatusOperator) | ||||
| .INPUT(addr, TensorType{DT_FLOAT}) | .INPUT(addr, TensorType{DT_FLOAT}) | ||||
| @@ -57,6 +63,9 @@ REG_OP(NPUClearFloatStatusOperator) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *data: A Tensor of data value. Must be float32. | *data: A Tensor of data value. Must be float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(NPUGetFloatStatusOperator) | REG_OP(NPUGetFloatStatusOperator) | ||||
| .INPUT(addr, TensorType{DT_FLOAT}) | .INPUT(addr, TensorType{DT_FLOAT}) | ||||
| @@ -68,6 +77,9 @@ REG_OP(NPUGetFloatStatusOperator) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor of type int32, output eight numbers with a value of zero. | *y: A Tensor of type int32, output eight numbers with a value of zero. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(NPUAllocFloatStatus) | REG_OP(NPUAllocFloatStatus) | ||||
| .OUTPUT(data, TensorType({DT_FLOAT})) | .OUTPUT(data, TensorType({DT_FLOAT})) | ||||
| @@ -81,6 +93,9 @@ REG_OP(NPUAllocFloatStatus) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *data: A Tensor of type float32. | *data: A Tensor of type float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(NPUClearFloatStatus) | REG_OP(NPUClearFloatStatus) | ||||
| .INPUT(addr, TensorType{DT_FLOAT}) | .INPUT(addr, TensorType{DT_FLOAT}) | ||||
| @@ -95,6 +110,9 @@ REG_OP(NPUClearFloatStatus) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *data: A Tensor of type float32. | *data: A Tensor of type float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(NPUGetFloatStatus) | REG_OP(NPUGetFloatStatus) | ||||
| .INPUT(addr, TensorType{DT_FLOAT}) | .INPUT(addr, TensorType{DT_FLOAT}) | ||||
| @@ -185,6 +185,73 @@ REG_OP(PadD) | |||||
| .REQUIRED_ATTR(paddings, ListListInt) | .REQUIRED_ATTR(paddings, ListListInt) | ||||
| .OP_END_FACTORY_REG(PadD) | .OP_END_FACTORY_REG(PadD) | ||||
| /** | |||||
| *@brief Pads a tensor. | |||||
| *@par Inputs: | |||||
| *Two inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, | |||||
| * uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, | |||||
| * complex128, uint32, uint64. | |||||
| * @li paddings: A Tensor of type int32 or int64. | |||||
| * @li constant_values: A optional Tensor of int32 or int64 | |||||
| *@par Attributes: | |||||
| * @li mode: An optional string, Defaults to "constant", indicates paddings mode, | |||||
| * support "constant", "reflect", "edge" | |||||
| * @li paddings_contiguous: An optional bool value, Defaults to true. | |||||
| * If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] | |||||
| * If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] | |||||
| *@par Outputs: | |||||
| *y: A Tensor of the same type as "x". | |||||
| *@par Third-party framework compatibility: | |||||
| * Compatible with ONNX operator Pad. | |||||
| */ | |||||
| REG_OP(PadV3) | |||||
| .INPUT(x, TensorType::BasicType()) | |||||
| .INPUT(paddings, TensorType::IndexNumberType()) | |||||
| .OPTIONAL_INPUT(constant_values, TensorType::BasicType()) | |||||
| .OUTPUT(y, TensorType::BasicType()) | |||||
| .ATTR(mode, String, "constant") | |||||
| .ATTR(paddings_contiguous, Bool, true) | |||||
| .OP_END_FACTORY_REG(PadV3) | |||||
| /** | |||||
| *@brief Pads a tensor. | |||||
| *@par Inputs: | |||||
| *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. | |||||
| *@par Attributes: | |||||
| * @li paddings: An required "vector<vector<int>>". | |||||
| * For each dimension D of input, paddings[D, 0] indicates how many | |||||
| * values to add before the contents of tensor in that dimension, | |||||
| * and paddings[D, 1] indicates how many values to add after the | |||||
| * contents of tensor in that dimension. | |||||
| * @li constant_values: An optional int value for pad. | |||||
| * @li mode: An optional string, Defaults to "constant", indicates paddings mode, | |||||
| * support "constant", "reflect", "edge" | |||||
| * @li paddings_contiguous: An optional bool value, Defaults to true. | |||||
| * If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] | |||||
| * If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] | |||||
| *@par Outputs: | |||||
| *y: A Tensor of the same type as "x". | |||||
| *@par Third-party framework compatibility: | |||||
| * Compatible with ONNX operator Pad. | |||||
| */ | |||||
| REG_OP(PadV3D) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
| .REQUIRED_ATTR(paddings, ListListInt) | |||||
| .ATTR(constant_values, Int, 0) | |||||
| .ATTR(mode, String, "constant") | |||||
| .ATTR(paddings_contiguous, Bool, true) | |||||
| .OP_END_FACTORY_REG(PadV3D) | |||||
| /** | /** | ||||
| *@brief Create a diagonal tensor | *@brief Create a diagonal tensor | ||||
| @@ -258,6 +325,9 @@ REG_OP(AscendPadding) | |||||
| /** | /** | ||||
| *@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n | *@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n | ||||
| *@par Restrictions: | |||||
| *Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *One input, include: | *One input, include: | ||||
| *addr_table: Tensor which last dimension must be 3. For example: [8, 3]. | *addr_table: Tensor which last dimension must be 3. For example: [8, 3]. | ||||
| @@ -32,7 +32,7 @@ namespace ge { | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Inputs include: | *Inputs include: | ||||
| * @li logits: A Tensor. Must be one of the following types: float32, float64,double. | |||||
| * @li logits: A Tensor. Must be one of the following types: float16, float, double. | |||||
| 2-D Tensor with shape [batch_size, num_classes]. | 2-D Tensor with shape [batch_size, num_classes]. | ||||
| * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n | * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n | ||||
| @@ -502,7 +502,7 @@ REG_OP(ReduceMean) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *One input: | *One input: | ||||
| * @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n | |||||
| * @li x: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. | *@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. | ||||
| @@ -521,8 +521,8 @@ REG_OP(ReduceMean) | |||||
| * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. | ||||
| */ | */ | ||||
| REG_OP(ReduceMeanD) | REG_OP(ReduceMeanD) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .REQUIRED_ATTR(axes, ListInt) | .REQUIRED_ATTR(axes, ListInt) | ||||
| .ATTR(keep_dims, Bool, false) | .ATTR(keep_dims, Bool, false) | ||||
| .OP_END_FACTORY_REG(ReduceMeanD) | .OP_END_FACTORY_REG(ReduceMeanD) | ||||
| @@ -26,6 +26,21 @@ | |||||
| namespace ge { | namespace ge { | ||||
| /** | |||||
| *@brief Creates a handle to a Variable resource. \n | |||||
| *@par Outputs: | |||||
| *y:A Tensor of type resource. \n | |||||
| *@par Attributes: | |||||
| * @li container: optional, string. | |||||
| * @li shared_name: optional, string. | |||||
| * @li dtype: required, type. | |||||
| * @li shape: optional, ListInt. \n | |||||
| *@see VarHandleOp. | |||||
| */ | |||||
| REG_OP(VarHandleOp) | REG_OP(VarHandleOp) | ||||
| .ATTR(container, String, "") | .ATTR(container, String, "") | ||||
| .ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
| @@ -34,6 +49,19 @@ REG_OP(VarHandleOp) | |||||
| .OUTPUT(y, TensorType({DT_RESOURCE})) | .OUTPUT(y, TensorType({DT_RESOURCE})) | ||||
| .OP_END_FACTORY_REG(VarHandleOp) | .OP_END_FACTORY_REG(VarHandleOp) | ||||
| /** | |||||
| *@brief Assigns a new value to a variable. \n | |||||
| *@par Inputs: | |||||
| *resource:Handle to the resource in which to store the variable. | |||||
| *value:The value to set the new tensor to use. \n | |||||
| *@par Attributes: | |||||
| * @li dtype: required, type. \n | |||||
| *@see AssignVariableOp. | |||||
| */ | |||||
| REG_OP(AssignVariableOp) | REG_OP(AssignVariableOp) | ||||
| .INPUT(resource, TensorType({DT_RESOURCE})) | .INPUT(resource, TensorType({DT_RESOURCE})) | ||||
| .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ | .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ | ||||
| @@ -41,6 +69,19 @@ REG_OP(AssignVariableOp) | |||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .OP_END_FACTORY_REG(AssignVariableOp) | .OP_END_FACTORY_REG(AssignVariableOp) | ||||
| /** | |||||
| *@brief Adds a value to the current value of a variable. \n | |||||
| *@par Inputs: | |||||
| *resource:Handle to the resource in which to store the variable. | |||||
| *value:The value by which the variable will be incremented. \n | |||||
| *@par Attributes: | |||||
| * @li dtype: required, type. \n | |||||
| *@see AssignAddVariableOp. | |||||
| */ | |||||
| REG_OP(AssignAddVariableOp) | REG_OP(AssignAddVariableOp) | ||||
| .INPUT(resource, TensorType({DT_RESOURCE})) | .INPUT(resource, TensorType({DT_RESOURCE})) | ||||
| .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ | .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ | ||||
| @@ -48,6 +89,19 @@ REG_OP(AssignAddVariableOp) | |||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .OP_END_FACTORY_REG(AssignAddVariableOp) | .OP_END_FACTORY_REG(AssignAddVariableOp) | ||||
| /** | |||||
| *@brief Subtracts a value to the current value of a variable. \n | |||||
| *@par Inputs: | |||||
| *resource:Handle to the resource in which to store the variable. | |||||
| *value:The value by which the variable will be incremented. \n | |||||
| *@par Attributes: | |||||
| * @li dtype: required, type. \n | |||||
| *@see AssignSubVariableOp. | |||||
| */ | |||||
| REG_OP(AssignSubVariableOp) | REG_OP(AssignSubVariableOp) | ||||
| .INPUT(resource, TensorType({DT_RESOURCE})) | .INPUT(resource, TensorType({DT_RESOURCE})) | ||||
| .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ | .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ | ||||
| @@ -81,6 +81,9 @@ REG_OP(BasicLSTMCell) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. | *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DynamicLSTM) | REG_OP(DynamicLSTM) | ||||
| .INPUT(x, TensorType({DT_FLOAT32})) | .INPUT(x, TensorType({DT_FLOAT32})) | ||||
| @@ -306,6 +309,9 @@ REG_OP(LSTMInputGrad) | |||||
| *two outputs: | *two outputs: | ||||
| *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32. | *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32. | ||||
| *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. | *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(BasicLSTMCellInputGrad) | REG_OP(BasicLSTMCellInputGrad) | ||||
| .INPUT(dgate, TensorType({DT_FLOAT16})) | .INPUT(dgate, TensorType({DT_FLOAT16})) | ||||
| @@ -328,6 +334,9 @@ REG_OP(BasicLSTMCellInputGrad) | |||||
| *two outputs: | *two outputs: | ||||
| *@li dw:A 4D Tensor. Must be one of the following types: float16. | *@li dw:A 4D Tensor. Must be one of the following types: float16. | ||||
| *@li db:A 4D Tensor. Must be one of the following types: float16, float32. | *@li db:A 4D Tensor. Must be one of the following types: float16, float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(BasicLSTMCellWeightGrad) | REG_OP(BasicLSTMCellWeightGrad) | ||||
| .INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
| @@ -358,6 +367,9 @@ REG_OP(BasicLSTMCellWeightGrad) | |||||
| *two outputs: | *two outputs: | ||||
| *@li dgate:A 4D Tensor. Must be one of the following types: float16. | *@li dgate:A 4D Tensor. Must be one of the following types: float16. | ||||
| *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. | *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(BasicLSTMCellCStateGrad) | REG_OP(BasicLSTMCellCStateGrad) | ||||
| .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -439,6 +451,9 @@ REG_OP(RNN) | |||||
| *two outputs: | *two outputs: | ||||
| *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(BasicRNNCell) | REG_OP(BasicRNNCell) | ||||
| .INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
| @@ -460,13 +475,13 @@ REG_OP(BasicRNNCell) | |||||
| *@brief: DynamicGRU calculation. | *@brief: DynamicGRU calculation. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *seven inputs: \n | *seven inputs: \n | ||||
| *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
| *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li cw:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li cb:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
| *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
| *@li b:Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
| *@li cb:Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | ||||
| @@ -480,11 +495,11 @@ REG_OP(BasicRNNCell) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *five outputs: \n | *five outputs: \n | ||||
| *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li r:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li n:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Restrictions: | *@par Restrictions: | ||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
| @@ -495,7 +510,7 @@ REG_OP(DynamicGRU) | |||||
| .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .INPUT(cw, TensorType({DT_FLOAT16})) | .INPUT(cw, TensorType({DT_FLOAT16})) | ||||
| .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) | |||||
| .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -516,13 +531,13 @@ REG_OP(DynamicGRU) | |||||
| *@brief: DynamicGRUV2 calculation. | *@brief: DynamicGRUV2 calculation. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *seven inputs: \n | *seven inputs: \n | ||||
| *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
| *@li weight_input:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li weight_hidden:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li bias_input:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li bias_hidden:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
| *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
| *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
| *@li bias_input:Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | ||||
| @@ -538,12 +553,12 @@ REG_OP(DynamicGRU) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *six outputs: \n | *six outputs: \n | ||||
| *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Restrictions: | *@par Restrictions: | ||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
| @@ -554,7 +569,7 @@ REG_OP(DynamicGRUV2) | |||||
| .INPUT(weight_hidden, TensorType({DT_FLOAT16})) | .INPUT(weight_hidden, TensorType({DT_FLOAT16})) | ||||
| .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) | |||||
| .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -1787,6 +1787,9 @@ REG_OP(TileWithAxis) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor of the same type as "x". | *y: A Tensor of the same type as "x". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ReadSelect) | REG_OP(ReadSelect) | ||||
| .INPUT(x, TensorType::ALL()) | .INPUT(x, TensorType::ALL()) | ||||
| @@ -1802,6 +1805,9 @@ REG_OP(ReadSelect) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor. Has the same type as "x". | *y: A Tensor. Has the same type as "x". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(WriteSelect) | REG_OP(WriteSelect) | ||||
| .INPUT(x, TensorType::ALL()) | .INPUT(x, TensorType::ALL()) | ||||
| @@ -625,6 +625,9 @@ REG_OP(ConfusionTransposeD) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor. Has the same type as "x". | *y: A Tensor. Has the same type as "x". | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ConfusionTranspose) | REG_OP(ConfusionTranspose) | ||||
| .INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
| @@ -28,6 +28,9 @@ namespace ge { | |||||
| /** | /** | ||||
| *@brief Applies a perspective transformation to an image . \n | *@brief Applies a perspective transformation to an image . \n | ||||
| *@par Restrictions: | |||||
| *Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *@li x: input tensor, format NCHW, type must be float. | *@li x: input tensor, format NCHW, type must be float. | ||||
| *@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n | *@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n | ||||
| @@ -159,7 +159,12 @@ typedef struct rtAiCoreMemorySize { | |||||
| * @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
| * @brief memory type | * @brief memory type | ||||
| */ | */ | ||||
| typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t; | |||||
| typedef enum tagRtMemoryType { | |||||
| RT_MEMORY_TYPE_HOST = 1, | |||||
| RT_MEMORY_TYPE_DEVICE = 2 , | |||||
| RT_MEMORY_TYPE_SVM = 3, | |||||
| RT_MEMORY_TYPE_DVPP = 4 | |||||
| } rtMemoryType_t; | |||||
| /** | /** | ||||
| * @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
| @@ -167,8 +172,8 @@ typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = | |||||
| */ | */ | ||||
| typedef struct tagRtPointerAttributes { | typedef struct tagRtPointerAttributes { | ||||
| rtMemoryType_t memoryType; // host memory or device memory | rtMemoryType_t memoryType; // host memory or device memory | ||||
| rtMemoryType_t locationType; | |||||
| uint32_t deviceID; // device ID | uint32_t deviceID; // device ID | ||||
| uint32_t isManaged; | |||||
| uint32_t pageSize; | uint32_t pageSize; | ||||
| } rtPointerAttributes_t; | } rtPointerAttributes_t; | ||||
| @@ -100,6 +100,8 @@ enum { | |||||
| TDT_TSD_SEND_HEARTBEAT_FAILED_CODE, | TDT_TSD_SEND_HEARTBEAT_FAILED_CODE, | ||||
| TDT_TSD_CLEAN_RESOURCE_FAILED_CODE, | TDT_TSD_CLEAN_RESOURCE_FAILED_CODE, | ||||
| TDT_TSD_SEND_MSG_FAILED_CODE, | TDT_TSD_SEND_MSG_FAILED_CODE, | ||||
| TDT_TSD_AICPU_SD_PROCESS_ABNORMAL_CODE, | |||||
| TDT_TSD_CUSTOM_PROCESS_ABNORMAL_CODE, | |||||
| TDT_PPC_DRIVER_INIT_FAIL_CODE, | TDT_PPC_DRIVER_INIT_FAIL_CODE, | ||||
| TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE, | TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE, | ||||
| TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE, | TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE, | ||||
| @@ -510,6 +512,8 @@ TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, " | |||||
| TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail"); | TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail"); | ||||
| TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail"); | TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail"); | ||||
| TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); | TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); | ||||
| TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_AICPU_SD_PROCESS_ABNORMAL, "aicpu_sd process abnormal"); | |||||
| TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_INFO, TDT_TSD_CUSTOM_PROCESS_ABNORMAL, "custom_aicpu_sd process abnormal"); | |||||
| /********************* PPC ****************************/ | /********************* PPC ****************************/ | ||||
| // create PPC error level error | // create PPC error level error | ||||
| @@ -0,0 +1,36 @@ | |||||
| /** | |||||
| * @file adx_datadump_server.h | |||||
| * | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. | |||||
| * | |||||
| * This program is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| */ | |||||
| #ifndef ADX_DATADUMP_SERVER_H | |||||
| #define ADX_DATADUMP_SERVER_H | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| /** | |||||
| * @brief initialize server for normal datadump function. | |||||
| * @return | |||||
| * IDE_DAEMON_OK: datadump server init success | |||||
| * IDE_DAEMON_ERROR: datadump server init failed | |||||
| */ | |||||
| int AdxDataDumpServerInit(); | |||||
| /** | |||||
| * @brief uninitialize server for normal datadump function. | |||||
| * @return | |||||
| * IDE_DAEMON_OK: datadump server uninit success | |||||
| * IDE_DAEMON_ERROR: datadump server uninit failed | |||||
| */ | |||||
| int AdxDataDumpServerUnInit(); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif | |||||