From 97b8c6fb91fd5765589a2862a2a906aac58dc473 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 16 Jul 2020 09:40:59 +0800 Subject: [PATCH] Synchronize latest Ascend software suite 16 Jul 2020 --- inc/external/ge/ge_api_types.h | 5 - inc/external/graph/operator.h | 2 + inc/external/register/register.h | 4 + inc/framework/common/ge_types.h | 1 + inc/framework/common/helper/model_helper.h | 2 + inc/framework/ge_runtime/model_runner.h | 7 +- inc/framework/ge_runtime/task_info.h | 135 +++++++----------- inc/graph/debug/ge_attr_define.h | 2 + src/common/graph/ge_attr_define.cc | 1 + src/common/graph/node.cc | 1 + src/common/graph/op_desc.cc | 10 +- src/common/graph/operator.cc | 4 +- src/common/graph/shape_refiner.cc | 1 + src/ge/common/ge/tbe_plugin_manager.cc | 34 +---- src/ge/common/ge/tbe_plugin_manager.h | 1 - src/ge/common/helper/model_helper.cc | 35 ++++- src/ge/common/model_parser/base.cc | 5 +- src/ge/common/profiling/profiling_manager.cc | 4 +- src/ge/common/properties_manager.cc | 7 +- src/ge/common/properties_manager.h | 2 +- src/ge/executor/ge_executor.cc | 1 - .../ge_local_engine/engine/host_cpu_engine.cc | 1 + src/ge/ge_runner.mk | 1 - src/ge/ge_runtime/model_runner.cc | 31 ---- src/ge/ge_runtime/output.cc | 2 +- src/ge/ge_runtime/runtime_model.cc | 81 ++++------- src/ge/ge_runtime/runtime_model.h | 9 +- src/ge/ge_runtime/task/aicpu_task.cc | 14 +- src/ge/ge_runtime/task/aicpu_task.h | 6 - src/ge/ge_runtime/task/hccl_task.cc | 1 + src/ge/ge_runtime/task/label_goto_task.cc | 70 --------- src/ge/ge_runtime/task/label_goto_task.h | 41 ------ src/ge/ge_runtime/task/label_set_task.cc | 70 --------- src/ge/ge_runtime/task/label_set_task.h | 41 ------ src/ge/ge_runtime/task/label_switch_task.cc | 131 ----------------- src/ge/ge_runtime/task/label_switch_task.h | 44 ------ src/ge/ge_runtime/task/stream_switch_task.cc | 2 +- src/ge/ge_runtime/task/task.h | 6 - src/ge/ge_runtime/task/tbe_task.cc | 7 +- src/ge/ge_runtime/task/tbe_task.h | 4 - src/ge/generator/ge_generator.cc | 48 ++----- .../graph/build/memory/block_mem_assigner.cc | 12 +- .../graph/build/memory/block_mem_assigner.h | 2 + .../graph/build/memory/graph_mem_assigner.cc | 1 + .../load/new_model_manager/data_dumper.cc | 77 +++++++--- .../load/new_model_manager/data_dumper.h | 9 +- .../load/new_model_manager/davinci_model.cc | 74 +++------- .../load/new_model_manager/davinci_model.h | 10 +- .../load/new_model_manager/model_manager.cc | 1 + .../task_info/end_graph_task_info.cc | 3 +- .../task_info/kernel_ex_task_info.cc | 3 +- .../task_info/kernel_task_info.cc | 8 +- src/ge/graph/partition/graph_partition.cc | 8 +- .../same_transdata_breadth_fusion_pass.cc | 1 + .../transop_without_reshape_fusion_pass.cc | 1 + src/ge/graph/preprocess/graph_preprocess.cc | 128 +---------------- .../insert_op/util_insert_aipp_op.cc | 47 +++++- .../insert_op/util_insert_aipp_op.h | 1 + src/ge/host_kernels/concat_v2_kernel.cc | 53 ++++--- src/ge/host_kernels/concat_v2_kernel.h | 2 +- src/ge/offline/main.cc | 35 ++++- src/ge/offline/single_op_parser.cc | 8 -- src/ge/session/omg.cc | 38 +++-- src/proto/fusion_model.proto | 3 +- .../inc/ops/elewise_calculation_ops.h | 11 +- .../inc/ops/matrix_calculation_ops.h | 40 ++++-- .../fwkacllib/inc/ops/nn_batch_norm_ops.h | 12 +- .../fwkacllib/inc/ops/nn_calculation_ops.h | 29 ++-- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 2 +- .../fwkacllib/inc/ops/nn_pooling_ops.h | 25 ++-- .../fwkacllib/inc/ops/nn_training_ops.h | 20 +-- .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 6 +- third_party/fwkacllib/inc/ops/quantize_ops.h | 18 +-- third_party/fwkacllib/inc/ops/selection_ops.h | 21 +-- .../fwkacllib/inc/ops/transformation_ops.h | 37 +++-- .../fwkacllib/inc/register/op_registry.h | 3 + 76 files changed, 577 insertions(+), 1046 deletions(-) delete mode 100644 src/ge/ge_runtime/task/label_goto_task.cc delete mode 100644 src/ge/ge_runtime/task/label_goto_task.h delete mode 100644 src/ge/ge_runtime/task/label_set_task.cc delete mode 100644 src/ge/ge_runtime/task/label_set_task.h delete mode 100644 src/ge/ge_runtime/task/label_switch_task.cc delete mode 100644 src/ge/ge_runtime/task/label_switch_task.h diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 5a8482e7..1632f11c 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -204,9 +204,6 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; // Save original model file name const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; -// FE enable quant optimize -const std::string QUANT_OPTIMIZE = "ge.quantOptimize"; - const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; @@ -274,7 +271,6 @@ static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); -static const char *const QUANT_OPTIMIZE = ge::QUANT_OPTIMIZE.c_str(); static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); @@ -304,7 +300,6 @@ const std::set global_options = {CORE_TYPE, AICORE_NUM, FUSION_SWITCH_FILE, ENABLE_SMALL_CHANNEL, - QUANT_OPTIMIZE, OP_SELECT_IMPL_MODE, OPTYPELIST_FOR_IMPLMODE}; } // namespace ir_option diff --git a/inc/external/graph/operator.h b/inc/external/graph/operator.h index 1deae7d9..4f837b9d 100644 --- a/inc/external/graph/operator.h +++ b/inc/external/graph/operator.h @@ -43,6 +43,7 @@ #define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt") namespace ge { +class Operator; class OperatorImpl; class NamedAttrs; class Graph; @@ -50,6 +51,7 @@ class AttrValue; using SubgraphBuilder = std::function; using OperatorImplPtr = std::shared_ptr; +using OperatorPtr = std::shared_ptr; class OpIO; using OutHandler = std::shared_ptr; diff --git a/inc/external/register/register.h b/inc/external/register/register.h index 28c984bf..a8421511 100644 --- a/inc/external/register/register.h +++ b/inc/external/register/register.h @@ -67,6 +67,7 @@ using google::protobuf::Message; class OpRegistrationDataImpl; using ParseParamFunc = std::function; +using ParseParamByOpFunc = std::function; using FusionParseParamFunc = std::function, ge::Operator &)>; using ParseSubgraphFunc = std::function; @@ -85,6 +86,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn); + OpRegistrationData &ParseParamsByOperatorFn(const ParseParamByOpFunc &parse_param_by_op_fn); + OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn); OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn); @@ -100,6 +103,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { std::set GetOriginOpTypeSet() const; domi::FrameworkType GetFrameworkType() const; ParseParamFunc GetParseParamFn() const; + ParseParamByOpFunc GetParseParamByOperatorFn() const; FusionParseParamFunc GetFusionParseParamFn() const; ParseSubgraphFunc GetParseSubgraphPostFn() const; diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index bcc90d25..27ae28ee 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -183,6 +183,7 @@ struct ModelData { uint32_t model_len = 0; // Model binary data length int32_t priority = 0; // Model priority std::string key; // Key path for encrypt model, Empty for unencrypt + std::string om_name; // om file name, used for data dump }; // The definition of Model information diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index bd9a6c57..3c9de891 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -46,6 +46,8 @@ class ModelHelper { static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model); static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr); + Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); + Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); private: bool is_assign_model_ = false; diff --git a/inc/framework/ge_runtime/model_runner.h b/inc/framework/ge_runtime/model_runner.h index 8e312b09..6e7abcb9 100644 --- a/inc/framework/ge_runtime/model_runner.h +++ b/inc/framework/ge_runtime/model_runner.h @@ -28,21 +28,16 @@ namespace ge { namespace model_runner { class RuntimeModel; -using RuntimeInfo = std::tuple; + class ModelRunner { public: static ModelRunner &Instance(); bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, std::shared_ptr davinci_model, std::shared_ptr listener); - bool LoadModelComplete(uint32_t model_id); const std::vector &GetTaskIdList(uint32_t model_id) const; - const std::vector &GetStreamIdList(uint32_t model_id) const; - - const std::map> &GetRuntimeInfoMap(uint32_t model_id) const; - bool UnloadModel(uint32_t model_id); bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index 68d71870..a48ed68b 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #include "cce/taskdown_api.h" @@ -53,27 +52,21 @@ class TaskInfo { virtual ~TaskInfo() {} uint32_t stream_id() const { return stream_id_; } TaskInfoType type() const { return type_; } - std::string op_name() const { return op_name_; } - bool dump_flag() const { return dump_flag_; } protected: - TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag) - : op_name_(op_name), stream_id_(stream_id), type_(type), dump_flag_(dump_flag) {} + TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {} private: - std::string op_name_; uint32_t stream_id_; TaskInfoType type_; - bool dump_flag_; }; class CceTaskInfo : public TaskInfo { public: - CceTaskInfo(const std::string &op_name, uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, - uint32_t block_dim, const std::vector &args, uint32_t args_size, - const std::vector &sm_desc, const std::vector &flow_table, - const std::vector &args_offset, bool is_flowtable) - : TaskInfo(op_name, stream_id, TaskInfoType::CCE, false), + CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim, + const std::vector &args, uint32_t args_size, const std::vector &sm_desc, + const std::vector &flow_table, const std::vector &args_offset, bool is_flowtable) + : TaskInfo(stream_id, TaskInfoType::CCE), ctx_(ctx), stub_func_(stub_func), block_dim_(block_dim), @@ -109,11 +102,11 @@ class CceTaskInfo : public TaskInfo { class TbeTaskInfo : public TaskInfo { public: - TbeTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, - const std::vector &args, uint32_t args_size, const std::vector &sm_desc, void *binary, - uint32_t binary_size, const std::vector &meta_data, const std::vector &input_data_addrs, - const std::vector &output_data_addrs, const std::vector &workspace_addrs, bool dump_flag) - : TaskInfo(op_name, stream_id, TaskInfoType::TBE, dump_flag), + TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector &args, + uint32_t args_size, const std::vector &sm_desc, void *binary, uint32_t binary_size, + const std::vector &meta_data, const std::vector &input_data_addrs, + const std::vector &output_data_addrs, const std::vector &workspace_addrs) + : TaskInfo(stream_id, TaskInfoType::TBE), stub_func_(stub_func), block_dim_(block_dim), args_(args), @@ -160,10 +153,9 @@ class TbeTaskInfo : public TaskInfo { class AicpuTaskInfo : public TaskInfo { public: - AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name, - const std::string &node_def, const std::vector &input_data_addrs, - const std::vector &output_data_addrs, bool dump_flag) - : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag), + AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def, + const std::vector &input_data_addrs, const std::vector &output_data_addrs) + : TaskInfo(stream_id, TaskInfoType::AICPU), so_name_(so_name), kernel_name_(kernel_name), node_def_(node_def), @@ -185,45 +177,37 @@ class AicpuTaskInfo : public TaskInfo { std::vector output_data_addrs_; }; -class LabelSetTaskInfo : public TaskInfo { +class LabelTaskInfo : public TaskInfo { public: - LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) - : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {} - ~LabelSetTaskInfo() override {} uint32_t label_id() const { return label_id_; } - private: + protected: + LabelTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t label_id) + : TaskInfo(stream_id, type), label_id_(label_id) {} + virtual ~LabelTaskInfo() override {} + uint32_t label_id_; }; -class LabelGotoTaskInfo : public TaskInfo { +class LabelSetTaskInfo : public LabelTaskInfo { public: - LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) - : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {} - ~LabelGotoTaskInfo() override {} - uint32_t label_id() const { return label_id_; } - - private: - uint32_t label_id_; + LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id) + : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {} + ~LabelSetTaskInfo() override {} }; -class LabelSwitchTaskInfo : public TaskInfo { +class LabelSwitchTaskInfo : public LabelTaskInfo { public: - LabelSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_size, - const std::vector &label_list, void *cond) - : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SWITCH, false), - label_size_(label_size), - label_list_(label_list), - cond_(cond) {} + LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id) + : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {} ~LabelSwitchTaskInfo() override {} - uint32_t label_size() { return label_size_; }; - const std::vector &label_list() { return label_list_; }; - void *cond() { return cond_; }; +}; - private: - uint32_t label_size_; - std::vector label_list_; - void *cond_; +class LabelGotoTaskInfo : public LabelTaskInfo { + public: + LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id) + : LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {} + ~LabelGotoTaskInfo() override {} }; class EventTaskInfo : public TaskInfo { @@ -231,8 +215,8 @@ class EventTaskInfo : public TaskInfo { uint32_t event_id() const { return event_id_; } protected: - EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id) - : TaskInfo(op_name, stream_id, type, false), event_id_(event_id) {} + EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id) + : TaskInfo(stream_id, type), event_id_(event_id) {} virtual ~EventTaskInfo() override {} uint32_t event_id_; @@ -240,41 +224,39 @@ class EventTaskInfo : public TaskInfo { class EventRecordTaskInfo : public EventTaskInfo { public: - EventRecordTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_RECORD, event_id) {} + EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id) + : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {} ~EventRecordTaskInfo() override {} }; class EventWaitTaskInfo : public EventTaskInfo { public: - EventWaitTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_WAIT, event_id) {} + EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id) + : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {} ~EventWaitTaskInfo() override {} }; class FusionStartTaskInfo : public TaskInfo { public: - explicit FusionStartTaskInfo(const std::string &op_name, uint32_t stream_id) - : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_START, false) {} + explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {} ~FusionStartTaskInfo() override {} }; class FusionEndTaskInfo : public TaskInfo { public: - explicit FusionEndTaskInfo(const std::string &op_name, uint32_t stream_id) - : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_END, false) {} + explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {} ~FusionEndTaskInfo() override {} }; class HcclTaskInfo : public TaskInfo { public: - HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, - void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, + HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr, + void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, const std::vector &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, - int64_t op_type, int64_t data_type, const std::string &group, - std::function hcom_bind_model, std::function hcom_unbind_model, - std::function, void *)> hcom_distribute_task, bool dump_flag) - : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), + int64_t op_type, int64_t data_type, std::function hcom_bind_model, + std::function hcom_unbind_model, + std::function, void *)> hcom_distribute_task) + : TaskInfo(stream_id, TaskInfoType::HCCL), hccl_type_(hccl_type), input_data_addr_(input_data_addr), output_data_addr_(output_data_addr), @@ -287,7 +269,6 @@ class HcclTaskInfo : public TaskInfo { root_id_(root_id), op_type_(op_type), data_type_(data_type), - group_(group), hcom_bind_model_(hcom_bind_model), hcom_unbind_model_(hcom_unbind_model), hcom_distribute_task_(hcom_distribute_task) {} @@ -305,7 +286,6 @@ class HcclTaskInfo : public TaskInfo { int64_t root_id() const { return root_id_; } int64_t op_type() const { return op_type_; } int64_t data_type() const { return data_type_; } - const std::string &group() const { return group_; } std::function hcom_bind_model() const { return hcom_bind_model_; } std::function hcom_unbind_model() const { return hcom_unbind_model_; } std::function, void *)> hcom_distribute_task() const { @@ -325,7 +305,6 @@ class HcclTaskInfo : public TaskInfo { int64_t root_id_; int64_t op_type_; int64_t data_type_; - std::string group_; std::function hcom_bind_model_; std::function hcom_unbind_model_; std::function, void *)> hcom_distribute_task_; @@ -333,11 +312,8 @@ class HcclTaskInfo : public TaskInfo { class ProfilerTraceTaskInfo : public TaskInfo { public: - ProfilerTraceTaskInfo(const std::string &op_name, uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) - : TaskInfo(op_name, stream_id, TaskInfoType::PROFILER_TRACE, false), - log_id_(log_id), - notify_(notify), - flat_(flat) {} + ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) + : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {} ~ProfilerTraceTaskInfo() override {} uint64_t log_id() const { return log_id_; } @@ -352,9 +328,8 @@ class ProfilerTraceTaskInfo : public TaskInfo { class MemcpyAsyncTaskInfo : public TaskInfo { public: - MemcpyAsyncTaskInfo(const std::string &op_name, uint32_t stream_id, void *dst, uint64_t dst_max, void *src, - uint64_t count, uint32_t kind, bool dump_flag) - : TaskInfo(op_name, stream_id, TaskInfoType::MEMCPY_ASYNC, dump_flag), + MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind) + : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC), dst_(dst), dst_max_(dst_max), src_(src), @@ -378,9 +353,9 @@ class MemcpyAsyncTaskInfo : public TaskInfo { class StreamSwitchTaskInfo : public TaskInfo { public: - StreamSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, int64_t true_stream_id, void *input_addr, - void *value_addr, int64_t cond, int64_t data_type) - : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_SWITCH, false), + StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond, + int64_t data_type) + : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH), true_stream_id_(true_stream_id), input_addr_(input_addr), value_addr_(value_addr), @@ -404,8 +379,8 @@ class StreamSwitchTaskInfo : public TaskInfo { class StreamActiveTaskInfo : public TaskInfo { public: - StreamActiveTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t active_stream_id) - : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {} + StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id) + : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {} ~StreamActiveTaskInfo() override {} uint32_t active_stream_id() const { return active_stream_id_; } diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index 873952e1..ea5544d1 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -181,6 +181,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; + // to be deleted GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 1c2c9c71..f780d525 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -154,6 +154,7 @@ const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; +const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; // To be deleted const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc index 1c8f327b..e0939e7e 100644 --- a/src/common/graph/node.cc +++ b/src/common/graph/node.cc @@ -759,6 +759,7 @@ graphStatus Node::Verify() const { GELOGW("Verify UpdateOutputName failed"); } } + node_op.BreakConnect(); } if (op_->CommonVerify() == GRAPH_SUCCESS) { diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index ba3c9b33..adb52162 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -818,7 +818,9 @@ graphStatus OpDesc::InferShapeAndType() { } } Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); - return (graphStatus)infer_func_(op_proxy); + graphStatus ret = (graphStatus)infer_func_(op_proxy); + op_proxy.BreakConnect(); + return ret; } graphStatus OpDesc::DefaultInferFormat() { @@ -863,12 +865,14 @@ graphStatus OpDesc::DefaultInferFormat() { } graphStatus OpDesc::OpVerify() { - Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); if (verifier_func_ == nullptr) { verifier_func_ = OperatorFactoryImpl::GetVerifyFunc(GetType()); } if (verifier_func_ != nullptr) { - return (graphStatus)verifier_func_(op_proxy); + Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); + graphStatus ret = (graphStatus)verifier_func_(op_proxy); + op_proxy.BreakConnect(); + return ret; } return GRAPH_SUCCESS; } diff --git a/src/common/graph/operator.cc b/src/common/graph/operator.cc index 8adf56c1..1ac8d41d 100644 --- a/src/common/graph/operator.cc +++ b/src/common/graph/operator.cc @@ -931,7 +931,7 @@ OperatorImplPtr Operator::GetOperatorImplPtr() const { return operator_impl_; } void Operator::BreakConnect() const { if (operator_impl_ == nullptr) { - GELOGE(GRAPH_FAILED, "operator impl is nullptr."); + GELOGW("operator impl is nullptr."); return; } operator_impl_->ClearInputLinks(); @@ -1318,6 +1318,8 @@ class GraphBuilderImpl { string type = src_op_impl->op_desc_->GetType(); auto node_op = ge::OperatorFactory::CreateOperator("node_op", type); auto tensor_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); + node_op.BreakConnect(); + GE_CHK_BOOL_EXEC(tensor_desc != nullptr, continue, "tensor_desc is null."); if ((tensor_desc->GetInputsSize() == 0 && tensor_desc->GetOutputsSize() > 0) || type == DATA || type == VARIABLE || type == INITDATA || type == GETNEXT) { diff --git a/src/common/graph/shape_refiner.cc b/src/common/graph/shape_refiner.cc index 845fe494..edf426a5 100644 --- a/src/common/graph/shape_refiner.cc +++ b/src/common/graph/shape_refiner.cc @@ -235,6 +235,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & GELOGD("get op from OperatorFactory success. opType: %s", op_type.c_str()); auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); + node_op.BreakConnect(); if (temp_op_desc == nullptr) { GELOGE(GRAPH_FAILED, "temp op desc is null"); return GRAPH_FAILED; diff --git a/src/ge/common/ge/tbe_plugin_manager.cc b/src/ge/common/ge/tbe_plugin_manager.cc index cdce243c..e02b9422 100644 --- a/src/ge/common/ge/tbe_plugin_manager.cc +++ b/src/ge/common/ge/tbe_plugin_manager.cc @@ -187,12 +187,9 @@ void TBEPluginManager::LoadCustomOpLib() { std::vector registration_datas = domi::OpRegistry::Instance()->registrationDatas; GELOGI("The size of registration_datas is: %zu", registration_datas.size()); for (OpRegistrationData reg_data : registration_datas) { - bool ret = CheckRegisterStatus(reg_data); - if (ret) { - GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), - static_cast(reg_data.GetImplyType())); - domi::OpRegistry::Instance()->Register(reg_data); - } + GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), + static_cast(reg_data.GetImplyType())); + domi::OpRegistry::Instance()->Register(reg_data); } } @@ -230,31 +227,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPlug } } -bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData ®_data) { - bool ret = true; - static char *parser_priority = std::getenv("PARSER_PRIORITY"); - static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce"; - auto ori_optype_set = reg_data.GetOriginOpTypeSet(); - for (const auto &op_type : ori_optype_set) { - domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type); - GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str()); - if (imply_type != domi::ImplyType::BUILDIN) { - if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) || - (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) { - GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(), - reg_data.GetOmOptype().c_str()); - ret = false; - } else { - GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str()); - } - } else { - GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(), - reg_data.GetOmOptype().c_str(), static_cast(reg_data.GetImplyType())); - } - } - return ret; -} - Status TBEPluginManager::CheckCustomAiCpuOpLib() { std::vector vec_op_type; diff --git a/src/ge/common/ge/tbe_plugin_manager.h b/src/ge/common/ge/tbe_plugin_manager.h index c2ad99b1..82264ae8 100644 --- a/src/ge/common/ge/tbe_plugin_manager.h +++ b/src/ge/common/ge/tbe_plugin_manager.h @@ -63,7 +63,6 @@ class TBEPluginManager { static void GetCustomOpPath(std::string &customop_path); void LoadCustomOpLib(); static Status CheckCustomAiCpuOpLib(); - static bool CheckRegisterStatus(const OpRegistrationData ®_data); SoHandlesVec handles_vec_; static std::map options_; diff --git a/src/ge/common/helper/model_helper.cc b/src/ge/common/helper/model_helper.cc index 556b43e7..2f95cbb1 100644 --- a/src/ge/common/helper/model_helper.cc +++ b/src/ge/common/helper/model_helper.cc @@ -184,7 +184,8 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin // Model ModelPtr model_ptr = ge::MakeShared(); GE_CHECK_NOTNULL_EXEC(model_ptr, return MEMALLOC_FAILED); - model_ptr->SetName(compute_graph->GetName()); + std::string original_model_name = compute_graph->GetName() + "_original"; + model_ptr->SetName(original_model_name); model_ptr->SetGraph(graph); model_ptr->SetVersion(static_cast(OM_PROTO_VERSION)); string framework_version; @@ -504,4 +505,36 @@ Status ModelHelper::ReleaseLocalModelData() noexcept { } return result; } + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name, + string &base_name) { + GELOGD("Get base_name from file, file_name:%s", file_name.c_str()); + GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output"); + size_t start_position = 0; + // using output as base_name (ignore ".om") + size_t filename_suffixes = 3; + if (file_name.find_last_of('/') != string::npos) { + start_position = file_name.find_last_of('/') + 1; + } + size_t end_position = file_name.length() - filename_suffixes; + base_name = file_name.substr(start_position, end_position - start_position); + GE_CHK_BOOL_EXEC_WARN(!base_name.empty(), return FAILED, "Get base_name failed, check params --output"); + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) { + GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str()); + // this can only be used after merged graph(graph name will be append with "_x", x is index); + GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output"); + size_t start_position = 0; + size_t end_position = graph_name.length(); + // using graph as model_name (ignore "_x", x is the index of graph) + if (graph_name.find_last_of('_') != string::npos) { + end_position = graph_name.find_last_of('_'); + } + model_name = graph_name.substr(start_position, end_position); + GE_CHK_BOOL_EXEC_WARN(!model_name.empty(), return FAILED, "Get model_name failed, check params --output"); + return SUCCESS; +} } // namespace ge diff --git a/src/ge/common/model_parser/base.cc b/src/ge/common/model_parser/base.cc index a9a21ec5..fb6a647f 100644 --- a/src/ge/common/model_parser/base.cc +++ b/src/ge/common/model_parser/base.cc @@ -15,7 +15,7 @@ */ #include "common/model_parser/base.h" - +#include "common/helper/model_helper.h" #include #include #include @@ -61,7 +61,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro // read data as a block: (void)fs.read(data, len); - + ModelHelper model_helper; + model_helper.GetBaseNameFromFileName(model_path, model_data.om_name); // Set the model data parameter model_data.model_data = data; model_data.model_len = len; diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index 748b9880..0944b5e0 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -292,6 +292,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St GELOGW("ProfMgrStartUp failed."); return FAILED; } + GELOGD("StartProfiling, prof_handle: %p", prof_handle); prof_handle_vec_.push_back(prof_handle); } #endif @@ -314,8 +315,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { int result = ProfMgrStop(prof_handle_vec_[i]); if (result != 0) { - GELOGW("ProfMgr stop return fail:%d.", result); - return; + GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]); } } vector().swap(prof_handle_vec_); diff --git a/src/ge/common/properties_manager.cc b/src/ge/common/properties_manager.cc index 7321af9f..cf1ada05 100644 --- a/src/ge/common/properties_manager.cc +++ b/src/ge/common/properties_manager.cc @@ -208,6 +208,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set Propertie } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model, + const std::string &om_name, const std::string &op_name) { std::lock_guard lock(dump_mutex_); // if dump all @@ -216,9 +217,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayer } // if this model need dump - auto model_iter = model_dump_properties_map_.find(model); - if (model_iter != model_dump_properties_map_.end()) { + auto om_name_iter = model_dump_properties_map_.find(om_name); + auto model_name_iter = model_dump_properties_map_.find(model); + if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) { // if no dump layer info, dump all layer in this model + auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter; if (model_iter->second.empty()) { return true; } diff --git a/src/ge/common/properties_manager.h b/src/ge/common/properties_manager.h index eb43820c..7cbb5949 100644 --- a/src/ge/common/properties_manager.h +++ b/src/ge/common/properties_manager.h @@ -84,7 +84,7 @@ class PropertiesManager { void AddDumpPropertyValue(const std::string &model, const std::set &layers); std::set GetAllDumpModel(); std::set GetDumpPropertyValue(const std::string &model); - bool IsLayerNeedDump(const std::string &model, const std::string &op_name); + bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name); void DeleteDumpPropertyValue(const std::string &model); void ClearDumpPropertyValue(); bool QueryModelDumpStatus(const std::string &model); diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index 210eecd6..ad7ef1fe 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -641,7 +641,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da model_data.model_data = nullptr; } } - return ret; } diff --git a/src/ge/ge_local_engine/engine/host_cpu_engine.cc b/src/ge/ge_local_engine/engine/host_cpu_engine.cc index 86f58b23..fd1b20d3 100644 --- a/src/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/src/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -131,6 +131,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_ke GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret); return FAILED; } + op.BreakConnect(); return SUCCESS; } diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index 2d4bcf6a..a9cfdd82 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -407,7 +407,6 @@ LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD LOCAL_CFLAGS += -g -O0 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) - LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) diff --git a/src/ge/ge_runtime/model_runner.cc b/src/ge/ge_runtime/model_runner.cc index b6e43dd5..59952e39 100644 --- a/src/ge/ge_runtime/model_runner.cc +++ b/src/ge/ge_runtime/model_runner.cc @@ -49,15 +49,6 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint return true; } -bool ModelRunner::LoadModelComplete(uint32_t model_id) { - auto model_iter = runtime_models_.find(model_id); - if (model_iter == runtime_models_.end()) { - GELOGE(PARAM_INVALID, "Model id %u not found.", model_id); - return false; - } - return model_iter->second->LoadComplete(); -} - const std::vector &ModelRunner::GetTaskIdList(uint32_t model_id) const { auto model_iter = runtime_models_.find(model_id); if (model_iter == runtime_models_.end()) { @@ -69,28 +60,6 @@ const std::vector &ModelRunner::GetTaskIdList(uint32_t model_id) const return model_iter->second->GetTaskIdList(); } -const std::vector &ModelRunner::GetStreamIdList(uint32_t model_id) const { - auto model_iter = runtime_models_.find(model_id); - if (model_iter == runtime_models_.end()) { - GELOGE(PARAM_INVALID, "Model id %u not found.", model_id); - static const std::vector empty_ret; - return empty_ret; - } - - return model_iter->second->GetStreamIdList(); -} - -const std::map> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const { - auto model_iter = runtime_models_.find(model_id); - if (model_iter == runtime_models_.end()) { - GELOGW("Model id %u not found.", model_id); - static const std::map> empty_ret; - return empty_ret; - } - - return model_iter->second->GetRuntimeInfoMap(); -} - bool ModelRunner::UnloadModel(uint32_t model_id) { auto iter = runtime_models_.find(model_id); if (iter != runtime_models_.end()) { diff --git a/src/ge/ge_runtime/output.cc b/src/ge/ge_runtime/output.cc index 5153f688..90c33bb4 100644 --- a/src/ge/ge_runtime/output.cc +++ b/src/ge/ge_runtime/output.cc @@ -76,7 +76,7 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde DataBuffer data_buf = rslt->blobs[data_begin + data_count]; bool ret = SetDataBuf(data_buf, data_begin, data_count, i, support_mem_share); if (!ret) { - GELOGE(FAILED, "Copy data to host error. index: %lu, addr: %p", i, v_input_data_addr_[i]); + GELOGE(FAILED, "Copy data to host failed. index: %lu, addr: %p", i, v_input_data_addr_[i]); return ret; } data_index = data_begin + data_count; diff --git a/src/ge/ge_runtime/runtime_model.cc b/src/ge/ge_runtime/runtime_model.cc index bdf8f2a6..c89ced91 100644 --- a/src/ge/ge_runtime/runtime_model.cc +++ b/src/ge/ge_runtime/runtime_model.cc @@ -28,6 +28,7 @@ namespace ge { namespace model_runner { + RuntimeModel::~RuntimeModel() { GELOGI("RuntimeModel destructor start"); @@ -115,34 +116,23 @@ bool RuntimeModel::InitEvent(uint32_t event_num) { return true; } -bool RuntimeModel::InitLabel(std::shared_ptr &davinci_model) { - GELOGI("batch number:%u.", davinci_model->GetBatchNum()); - label_list_.resize(davinci_model->GetBatchNum()); - for (auto &task_info : davinci_model->GetTaskInfoList()) { - if (task_info == nullptr) { - GELOGE(PARAM_INVALID, "task_info is null."); - continue; - } - - if (task_info->type() != TaskInfoType::LABEL_SET) { - continue; - } - auto label_set_task_info = std::static_pointer_cast(task_info); - - if (label_set_task_info->stream_id() >= stream_list_.size()) { - GELOGE(PARAM_INVALID, "Invalid stream id."); +bool RuntimeModel::InitLabel(uint32_t batch_num) { + GELOGI("batch number:%u.", batch_num); + for (uint32_t i = 0; (batch_num != 0 && i <= batch_num); ++i) { + rtLabel_t rt_lLabel = nullptr; + rtError_t rt_ret = rtLabelCreate(&rt_lLabel); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, i; %u; ret: 0x%X", i, rt_ret); return false; } - rtLabel_t rt_label = nullptr; - rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, ret: 0x%X", rt_ret); + if (rt_lLabel == nullptr) { + GELOGE(RT_FAILED, "rtLabel is nullptr!"); return false; } - label_list_[label_set_task_info->label_id()] = rt_label; - } + label_list_.emplace_back(rt_lLabel); + } return true; } @@ -174,7 +164,7 @@ bool RuntimeModel::InitResource(std::shared_ptr &davinci_model) { return false; } - if (!InitLabel(davinci_model)) { + if (!InitLabel(davinci_model->GetBatchNum())) { return false; } @@ -219,41 +209,20 @@ bool RuntimeModel::LoadTask() { return false; } task_id_list_.push_back(task_id); - stream_id_list_.push_back(stream_id); - if (task->Args() != nullptr) { - std::shared_ptr runtime_tuple = nullptr; - GE_MAKE_SHARED(runtime_tuple = std::make_shared(task_id, stream_id, task->Args()), return false); - auto emplace_ret = runtime_info_map_.emplace(task->task_name(), runtime_tuple); - if (!emplace_ret.second) { - GELOGW("Task name exist:%s", task->task_name().c_str()); - } - } } if (task_list_.empty()) { GELOGE(FAILED, "Task list is empty"); return false; } + GELOGI("Distribute task succ."); - GELOGI("LoadTask succ."); - return true; -} - -bool RuntimeModel::LoadComplete() { - uint32_t task_id = 0; - uint32_t stream_id = 0; - auto rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtModelGetTaskId failed, ret:0x%X", rt_ret); - return RT_FAILED; - } - task_id_list_.push_back(task_id); - stream_id_list_.push_back(stream_id); - - rt_ret = rtModelLoadComplete(rt_model_handle_); + auto rt_ret = rtModelLoadComplete(rt_model_handle_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api rtModelLoadComplete failed, ret: 0x%X.", rt_ret); return false; } + + GELOGI("LoadTask succ."); return true; } @@ -301,14 +270,10 @@ bool RuntimeModel::Run() { return false; } - GELOGI("Run rtModelExecute success, ret = 0x%X", ret); + GELOGI("Run rtModelExecute success"); ret = rtStreamSynchronize(rt_model_stream_); if (ret != RT_ERROR_NONE) { - if (ret == RT_ERROR_END_OF_SEQUENCE) { - GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); - return true; - } GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); return false; } @@ -468,7 +433,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model } if (constant->output_tensors[0].size < constant->weight_data.size()) { - GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", constant->output_tensors[0].size, + GELOGE(PARAM_INVALID, "Output size:%u is less than weight data size:%zu", constant->output_tensors[0].size, constant->weight_data.size()); return false; } @@ -483,8 +448,11 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model /// The logic of GetShapeSize is wrong, the scaler tensor's GetShapeSize is zero /// and that of unknown shape is zero too. /// Unknown shape will not appear here, so we can use zero judge a tensor is scaler or not. - int64_t elem_num = - (constant->weight_tensors[0].GetShapeSize() == 0) ? 1 : constant->weight_tensors[0].GetShapeSize(); + int64_t elem_num = constant->weight_tensors[0].GetShapeSize(); + if (elem_num == 0 && constant->weight_tensors[0].size == 0) { + elem_num = 1; + } + if (constant->weight_data.size() < sizeof(uint64_t)) { GELOGE(FAILED, "weight_data size is smaller than sizeof(uint64_t)"); return false; @@ -527,6 +495,5 @@ void RuntimeModel::CreateOutput(uint32_t index, const OpInfo &op_info, InputOutp const std::vector &RuntimeModel::GetTaskIdList() const { return task_id_list_; } -const std::vector &RuntimeModel::GetStreamIdList() const { return stream_id_list_; } } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/runtime_model.h b/src/ge/ge_runtime/runtime_model.h index 67535296..e8ff4057 100644 --- a/src/ge/ge_runtime/runtime_model.h +++ b/src/ge/ge_runtime/runtime_model.h @@ -27,7 +27,7 @@ namespace ge { namespace model_runner { -using RuntimeInfo = std::tuple; + class Task; class RuntimeModel { public: @@ -35,10 +35,7 @@ class RuntimeModel { ~RuntimeModel(); bool Load(uint32_t device_id, uint64_t session_id, std::shared_ptr &davinci_model); - bool LoadComplete(); const std::vector &GetTaskIdList() const; - const std::vector &GetStreamIdList() const; - const std::map> &GetRuntimeInfoMap() const { return runtime_info_map_; } bool Run(); bool CopyInputData(const InputData &input_data); bool GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, @@ -51,7 +48,7 @@ class RuntimeModel { bool LoadTask(); bool InitStream(std::shared_ptr &davinci_model); bool InitEvent(uint32_t event_num); - bool InitLabel(std::shared_ptr &davinci_model); + bool InitLabel(uint32_t batch_num); bool InitDataInfo(std::shared_ptr &davinci_model); bool InitOutputInfo(std::shared_ptr &davinci_model); bool InitConstantInfo(std::shared_ptr &davinci_model); @@ -80,8 +77,6 @@ class RuntimeModel { std::vector> constant_info_list_{}; std::vector task_id_list_{}; - std::vector stream_id_list_{}; - std::map> runtime_info_map_; }; } // namespace model_runner diff --git a/src/ge/ge_runtime/task/aicpu_task.cc b/src/ge/ge_runtime/task/aicpu_task.cc index 9b126ec0..4cb71866 100644 --- a/src/ge/ge_runtime/task/aicpu_task.cc +++ b/src/ge/ge_runtime/task/aicpu_task.cc @@ -85,15 +85,11 @@ bool AicpuTask::Distribute() { return false; } - input_output_addr_ = reinterpret_cast(reinterpret_cast(args_) + io_addr_offset); - - auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT; - GELOGI( - "Distribute AicpuTask start, args_size = %u, io_addrs_num = %u, so_name = %s, kernel_name = %s, dump_flag = %d.", - args_size, io_addrs_num, task_info_->so_name().data(), task_info_->kernel_name().data(), dump_flag); - rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(task_info_->so_name().data()), - reinterpret_cast(task_info_->kernel_name().data()), 1, args_, - args_size, nullptr, stream_, dump_flag); + GELOGI("Distribute AicpuTask start, args_size = %u, io_addrs_num = %u, so_name = %s, kernel_name = %s.", args_size, + io_addrs_num, task_info_->so_name().data(), task_info_->kernel_name().data()); + rt_ret = rtCpuKernelLaunch(reinterpret_cast(task_info_->so_name().data()), + reinterpret_cast(task_info_->kernel_name().data()), 1, args_, args_size, + nullptr, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; diff --git a/src/ge/ge_runtime/task/aicpu_task.h b/src/ge/ge_runtime/task/aicpu_task.h index cc21af8a..f5cdc617 100644 --- a/src/ge/ge_runtime/task/aicpu_task.h +++ b/src/ge/ge_runtime/task/aicpu_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_AICPU_TASK_H_ #include -#include #include "ge_runtime/task/task.h" namespace ge { @@ -31,17 +30,12 @@ class AicpuTask : public TaskRepeater { bool Distribute() override; - void *Args() override { return input_output_addr_; } - - std::string task_name() const override { return task_info_->op_name(); } - private: static void ReleaseRtMem(void **ptr) noexcept; std::shared_ptr task_info_; void *stream_; void *args_; - void *input_output_addr_; }; } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/hccl_task.cc b/src/ge/ge_runtime/task/hccl_task.cc index 3d5f8504..54ae3bf3 100644 --- a/src/ge/ge_runtime/task/hccl_task.cc +++ b/src/ge/ge_runtime/task/hccl_task.cc @@ -115,6 +115,7 @@ bool HcclTask::Distribute() { rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + (void)rtStreamDestroy(stream); return false; } diff --git a/src/ge/ge_runtime/task/label_goto_task.cc b/src/ge/ge_runtime/task/label_goto_task.cc deleted file mode 100644 index d357accb..00000000 --- a/src/ge/ge_runtime/task/label_goto_task.cc +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/task/label_goto_task.h" -#include "ge_runtime/task/task_factory.h" - -namespace ge { -namespace model_runner { -LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), - task_info_(task_info), - stream_(nullptr), - label_(nullptr) { - if (task_info_ == nullptr) { - GELOGW("task_info_ is null!"); - return; - } - auto stream_list = model_context.stream_list(); - auto label_list = model_context.label_list(); - uint32_t stream_id = task_info->stream_id(); - uint32_t label_id = task_info->label_id(); - GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); - if (stream_id >= stream_list.size() || label_id >= label_list.size()) { - GELOGW("Stream/Label id invalid."); - return; - } - stream_ = stream_list[stream_id]; - label_ = label_list[label_id]; -} - -LabelGotoTask::~LabelGotoTask() {} - -bool LabelGotoTask::Distribute() { - GELOGI("LabelGotoTask Distribute start."); - if (stream_ == nullptr) { - GELOGE(PARAM_INVALID, "stream is null!"); - return false; - } - if (label_ == nullptr) { - GELOGE(PARAM_INVALID, "label is null!"); - return false; - } - rtError_t rt_ret = rtLabelGotoEx(label_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("DistributeTask end."); - return true; -} - -REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); - -} // namespace model_runner -} // namespace ge diff --git a/src/ge/ge_runtime/task/label_goto_task.h b/src/ge/ge_runtime/task/label_goto_task.h deleted file mode 100644 index 4fd6d1bc..00000000 --- a/src/ge/ge_runtime/task/label_goto_task.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ -#define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ - -#include -#include "ge_runtime/task/task.h" - -namespace ge { -namespace model_runner { -class LabelGotoTask : public TaskRepeater { - public: - LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info); - - ~LabelGotoTask() override; - - bool Distribute() override; - - private: - std::shared_ptr task_info_; - void *stream_; - void *label_; -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ diff --git a/src/ge/ge_runtime/task/label_set_task.cc b/src/ge/ge_runtime/task/label_set_task.cc deleted file mode 100644 index 3ab5802c..00000000 --- a/src/ge/ge_runtime/task/label_set_task.cc +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/task/label_set_task.h" -#include "ge_runtime/task/task_factory.h" - -namespace ge { -namespace model_runner { -LabelSetTask::LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), - task_info_(task_info), - stream_(nullptr), - label_(nullptr) { - if (task_info_ == nullptr) { - GELOGW("task_info_ is null!"); - return; - } - auto stream_list = model_context.stream_list(); - auto label_list = model_context.label_list(); - uint32_t stream_id = task_info->stream_id(); - uint32_t label_id = task_info->label_id(); - GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); - if (stream_id >= stream_list.size() || label_id >= label_list.size()) { - GELOGW("Stream/Label id invalid."); - return; - } - stream_ = stream_list[stream_id]; - label_ = label_list[label_id]; -} - -LabelSetTask::~LabelSetTask() {} - -bool LabelSetTask::Distribute() { - GELOGI("LabelSetTask Distribute start."); - if (stream_ == nullptr) { - GELOGE(PARAM_INVALID, "stream is null!"); - return false; - } - if (label_ == nullptr) { - GELOGE(PARAM_INVALID, "label is null!"); - return false; - } - rtError_t rt_ret = rtLabelSet(label_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("DistributeTask end."); - return true; -} - -REGISTER_TASK(TaskInfoType::LABEL_SET, LabelSetTask, LabelSetTaskInfo); - -} // namespace model_runner -} // namespace ge diff --git a/src/ge/ge_runtime/task/label_set_task.h b/src/ge/ge_runtime/task/label_set_task.h deleted file mode 100644 index 70bf1584..00000000 --- a/src/ge/ge_runtime/task/label_set_task.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ -#define GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ - -#include -#include "ge_runtime/task/task.h" - -namespace ge { -namespace model_runner { -class LabelSetTask : public TaskRepeater { - public: - LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info); - - ~LabelSetTask() override; - - bool Distribute() override; - - private: - std::shared_ptr task_info_; - void *stream_; - void *label_; -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ diff --git a/src/ge/ge_runtime/task/label_switch_task.cc b/src/ge/ge_runtime/task/label_switch_task.cc deleted file mode 100644 index a3c2d41a..00000000 --- a/src/ge/ge_runtime/task/label_switch_task.cc +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/task/label_switch_task.h" -#include "ge_runtime/task/task_factory.h" - -namespace ge { -namespace model_runner { -LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, - const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), - task_info_(task_info), - stream_(nullptr), - all_label_resource_(), - label_info_(nullptr) { - if (task_info_ == nullptr) { - GELOGW("task_info_ is null!"); - return; - } - - all_label_resource_ = model_context.label_list(); - auto stream_list = model_context.stream_list(); - uint32_t stream_id = task_info->stream_id(); - GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - if (stream_id >= stream_list.size()) { - GELOGW("Stream id invalid."); - return; - } - stream_ = stream_list[stream_id]; -} - -LabelSwitchTask::~LabelSwitchTask() { - if (label_info_ != nullptr) { - rtError_t rt_ret = rtFree(label_info_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); - } - label_info_ = nullptr; - } -} - -bool LabelSwitchTask::Distribute() { - GELOGI("LabelSwitchTask Distribute start."); - if (!CheckParamValid()) { - return false; - } - - const std::vector &label_index_list = task_info_->label_list(); - std::vector label_list(task_info_->label_size(), nullptr); - - for (size_t i = 0; i < task_info_->label_size(); ++i) { - uint32_t label_index = label_index_list[i]; - if (label_index >= all_label_resource_.size()) { - GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index, - all_label_resource_.size()); - return false; - } - label_list[i] = all_label_resource_[label_index]; - GELOGI("Case %zu: label id %zu.", i, label_index); - } - - uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size(); - rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("DistributeTask end."); - return true; -} - -bool LabelSwitchTask::CheckParamValid() { - if (stream_ == nullptr) { - GELOGE(PARAM_INVALID, "stream is null!"); - return false; - } - - if (task_info_->label_list().empty()) { - GELOGE(PARAM_INVALID, "label_list is empty."); - return false; - } - - if (task_info_->label_size() != task_info_->label_list().size()) { - GELOGE(PARAM_INVALID, "label_list size %zu but label_size is %u.", task_info_->label_list().size(), - task_info_->label_size()); - return false; - } - - if (task_info_->label_size() >= UINT32_MAX / sizeof(rtLabelDevInfo)) { - GELOGE(PARAM_INVALID, "label_size %u will overflow.", task_info_->label_size()); - return false; - } - - if (label_info_ != nullptr) { - GELOGE(PARAM_INVALID, "label_info_ has dirty data."); - return false; - } - - return true; -} - -REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); - -} // namespace model_runner -} // namespace ge diff --git a/src/ge/ge_runtime/task/label_switch_task.h b/src/ge/ge_runtime/task/label_switch_task.h deleted file mode 100644 index 463faa31..00000000 --- a/src/ge/ge_runtime/task/label_switch_task.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ -#define GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ - -#include -#include "ge_runtime/task/task.h" - -namespace ge { -namespace model_runner { -class LabelSwitchTask : public TaskRepeater { - public: - LabelSwitchTask(const ModelContext &model_context, const std::shared_ptr &task_info); - - ~LabelSwitchTask() override; - - bool Distribute() override; - - private: - bool CheckParamValid(); - - std::shared_ptr task_info_; - void *stream_; - std::vector all_label_resource_; - void *label_info_; -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ diff --git a/src/ge/ge_runtime/task/stream_switch_task.cc b/src/ge/ge_runtime/task/stream_switch_task.cc index 2adcb4bd..91141139 100644 --- a/src/ge/ge_runtime/task/stream_switch_task.cc +++ b/src/ge/ge_runtime/task/stream_switch_task.cc @@ -51,7 +51,7 @@ bool StreamSwitchTask::Distribute() { } if (static_cast(task_info_->true_stream_id()) >= stream_list_.size()) { - GELOGE(PARAM_INVALID, "true_stream_id %ld must less than stream_list_ size %zu!", task_info_->true_stream_id(), + GELOGE(PARAM_INVALID, "true_stream_id %ld must be less than stream_list_ size %zu!", task_info_->true_stream_id(), stream_list_.size()); return false; } diff --git a/src/ge/ge_runtime/task/task.h b/src/ge/ge_runtime/task/task.h index 6c4df248..7c748a7d 100644 --- a/src/ge/ge_runtime/task/task.h +++ b/src/ge/ge_runtime/task/task.h @@ -18,9 +18,7 @@ #define GE_GE_RUNTIME_TASK_TASK_H_ #include -#include #include -#include #include "runtime/rt_model.h" #include "ge_runtime/model_context.h" #include "ge_runtime/task_info.h" @@ -34,10 +32,6 @@ class Task { virtual ~Task() {} virtual bool Distribute() = 0; - - virtual void *Args() { return nullptr; } - - virtual std::string task_name() const { return ""; } }; template diff --git a/src/ge/ge_runtime/task/tbe_task.cc b/src/ge/ge_runtime/task/tbe_task.cc index e7025ae8..8a3c36a4 100644 --- a/src/ge/ge_runtime/task/tbe_task.cc +++ b/src/ge/ge_runtime/task/tbe_task.cc @@ -95,14 +95,15 @@ bool TbeTask::Distribute() { return false; } + GELOGI("InitTbeTask end."); GELOGI("DistributeTbeTask start."); - auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT; - rt_ret = rtKernelLaunchWithFlag(stub_func_, task_info_->block_dim(), args_, args_size, nullptr, stream_, dump_flag); + rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, args_size, nullptr, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api rtKernelLaunch failed, ret: 0x%X", rt_ret); return false; } - GELOGI("[DataDump] task name:%s, dump_flag:%d", task_info_->op_name().c_str(), dump_flag); + + GELOGI("DistributeTbeTask end."); return true; } diff --git a/src/ge/ge_runtime/task/tbe_task.h b/src/ge/ge_runtime/task/tbe_task.h index a8ce6268..994ba5e2 100644 --- a/src/ge/ge_runtime/task/tbe_task.h +++ b/src/ge/ge_runtime/task/tbe_task.h @@ -30,10 +30,6 @@ class TbeTask : public TaskRepeater { bool Distribute() override; - void *Args() override { return args_; } - - std::string task_name() const override { return task_info_->op_name(); } - private: std::shared_ptr task_info_; void *stream_; diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc index f0b69242..b01f7591 100644 --- a/src/ge/generator/ge_generator.cc +++ b/src/ge/generator/ge_generator.cc @@ -20,6 +20,7 @@ #include "common/helper/model_helper.h" #include "common/helper/om_file_helper.h" #include "common/util.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "ge/ge_api.h" #include "graph/ge_context.h" @@ -125,17 +126,7 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen if (data_op == nullptr) { return FAILED; } - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); - auto input_desc = op_desc->MutableInputDesc(index); - GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID); - ge::Format old_format = input_desc->GetFormat(); - if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { - input_desc->SetFormat(FORMAT_ND); - input_desc->SetOriginFormat(FORMAT_ND); - (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format)); - (void)AttrUtils::SetBool(data_op, "_is_single_op", true); - } + (void)AttrUtils::SetBool(data_op, "_is_single_op", true); GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); @@ -157,17 +148,7 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons if (op_desc == nullptr) { return FAILED; } - auto single_op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID); - auto output_desc = single_op_desc->MutableOutputDesc(0); - GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID); - ge::Format old_format = output_desc->GetFormat(); - if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { - output_desc->SetFormat(FORMAT_ND); - output_desc->SetOriginFormat(FORMAT_ND); - (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format)); - (void)AttrUtils::SetBool(op_desc, "_is_single_op", true); - } + (void)AttrUtils::SetBool(op_desc, "_is_single_op", true); int32_t count = 0; for (const auto &out_desc : outputs) { GeTensorDesc tensor = out_desc.GetTensorDesc(); @@ -212,19 +193,6 @@ static void GetOpsProtoPath(string &opsproto_path) { opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); } -static string GetModelNameFromFileName(const string &file_name_prefix) { - int start_position = 0; - // using output as model_name (ignore ".om") - int filename_suffixes = 3; - if (file_name_prefix.find_last_of('/') != string::npos) { - start_position += 1; - } - int end_position = file_name_prefix.length() - filename_suffixes; - string model_name = file_name_prefix.substr(start_position, end_position - start_position); - GELOGI("Get model_name from file, model_name:%s", model_name.c_str()); - return model_name; -} - class GeGenerator::Impl { public: Status BuildModel(const Graph &graph, const vector &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); @@ -332,8 +300,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr GraphId graph_id; GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); - const string model_name = GetModelNameFromFileName(file_name_prefix); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!"); impl_->is_offline_ = is_offline; Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); if (ret != SUCCESS) { @@ -345,9 +311,15 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr } GE_CHECK_NOTNULL(ge_root_model); GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); + ModelHelper model_helper; + string model_name = ""; + Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); + if (name_ret != SUCCESS) { + GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); + return PARAM_INVALID; + } map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; - GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null"); ge_model->SetName(model_name); ret = impl_->SaveModel(file_name_prefix, ge_model, model); diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 602b71bd..df7912fa 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -38,6 +38,7 @@ namespace { const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; +const char *const kOpNoReuseMem = "no_reuse_mem_flag"; const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory"; const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; const int kReuseMaxCount = 10; @@ -624,8 +625,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env); if (ge_disable_reuse_mem_env != "1") { bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); - is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && reuse_mem_flag && is_op_reuse_mem && - (IsPreReuse(n, out_index)); + is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && + reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); auto stream_id = node_op_desc->GetStreamId(); auto map_iter = reusable_streams_map_.find(stream_id); if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { @@ -1182,6 +1183,9 @@ void ReAssignContinuousBlocks(const std::vector &org_blocks, GELOGI("Block continuous input index:%d", memory_block->input_index_); count++; + if (count == 1) { + memory_block->first_continuous_block_ = true; + } if (count == continuous_blocks.size()) { memory_block->last_continuous_block_ = true; } @@ -1242,6 +1246,10 @@ void BlockMemAssigner::ResizeMemoryBlocks() { if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { continue; } + if (memory_block->first_continuous_block_) { + mem_offset_ += MEM_ALIGN_SIZE; + } + memory_block->Resize(); memory_block->SetHeadOffset(mem_offset_); mem_offset_ += memory_block->Size(); diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h index 14aba576..8ee4506e 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.h +++ b/src/ge/graph/build/memory/block_mem_assigner.h @@ -64,6 +64,7 @@ class MemoryBlock { reuse_mem_(reuse_mem), input_index_(0), continuous_block_(false), + first_continuous_block_(false), last_continuous_block_(false), is_zero_copy_(false), block_size_(block_size), @@ -129,6 +130,7 @@ class MemoryBlock { bool reuse_mem_; uint32_t input_index_; bool continuous_block_; + bool first_continuous_block_; bool last_continuous_block_; bool is_zero_copy_; std::map depend_stream_life_; diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index 931ebba4..c4aca639 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -446,6 +446,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node return ge::FAILED; } + memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; size_t pre_mem_offset = memory_offset_[0].mem_offset_; diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc index 47f6ffcf..653a3fa1 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.cc +++ b/src/ge/graph/load/new_model_manager/data_dumper.cc @@ -21,6 +21,7 @@ #include #include +#include "common/debug/log.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" @@ -28,6 +29,7 @@ #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/utils/attr_utils.h" +#include "graph/utils/tensor_utils.h" #include "proto/ge_ir.pb.h" #include "proto/op_mapping_info.pb.h" #include "runtime/mem.h" @@ -106,6 +108,7 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_ } void DataDumper::SaveDumpInput(const std::shared_ptr &node) { + GELOGI("Start to save data %s message", node->GetName().c_str()); if (node != nullptr) { auto input_op_desc = node->GetOpDesc(); if (input_op_desc == nullptr) { @@ -126,6 +129,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr &node) { {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); } } + GELOGI("Save data message successfully"); } } @@ -159,30 +163,39 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s return; } - GELOGI("Save input dump task %s, id: %u.", data_op->GetName().c_str(), task_id); + int64_t data_size = 0; + if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) { + GELOGI("Get aipp data size according to attr is %ld", data_size); + } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) { + GELOGE(PARAM_INVALID, "Get input size filed"); + return; + } + + GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id, + stream_id, data_size); op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index, - inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims()}); + inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size}); } } static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond, aicpu::dump::OpMappingInfo &op_mapping_info) { if (step_id != 0) { - GELOGI("step_id exist."); + GELOGI("step_id exists."); op_mapping_info.set_step_id_addr(static_cast(step_id)); } else { GELOGI("step_id is null."); } if (loop_per_iter != 0) { - GELOGI("loop_per_iter exist."); + GELOGI("loop_per_iter exists."); op_mapping_info.set_iterations_per_loop_addr(static_cast(loop_per_iter)); } else { GELOGI("loop_per_iter is null."); } if (loop_cond != 0) { - GELOGI("loop_cond exist."); + GELOGI("loop_cond exists."); op_mapping_info.set_loop_cond_addr(static_cast(loop_cond)); } else { GELOGI("loop_cond is null."); @@ -211,10 +224,19 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: output.mutable_shape()->add_dim(dim); } + int64_t output_size = 0; + if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { + GELOGE(PARAM_INVALID, "Get output size filed"); + return PARAM_INVALID; + } + GELOGI("Get output size in dump is %ld", output_size); std::string origin_name; int32_t origin_output_index = -1; (void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); (void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); + GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size); + return PARAM_INVALID) + output.set_size(output_size); output.set_original_name(origin_name); output.set_original_output_index(origin_output_index); output.set_original_output_format(static_cast(output_descs.at(i).GetOriginFormat())); @@ -247,6 +269,10 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: int32_t origin_output_index = -1; (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); + GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0, + GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size); + return PARAM_INVALID) + output.set_size(inner_dump_info.data_size); output.set_original_name(origin_name); output.set_original_output_index(origin_output_index); output.set_original_output_format(static_cast(output_tensor->GetOriginFormat())); @@ -283,6 +309,17 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump:: input.mutable_shape()->add_dim(dim); } + int64_t input_size = 0; + if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { + GELOGI("Get aipp input size according to attr is %ld", input_size); + } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { + GELOGE(PARAM_INVALID, "Get input size filed"); + return PARAM_INVALID; + } + GELOGI("Get input size in dump is %ld", input_size); + GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size); + return PARAM_INVALID;) + input.set_size(input_size); input.set_address(static_cast(inner_dump_info.args + sizeof(void *) * i)); task.mutable_input()->Add(std::move(input)); } @@ -323,7 +360,7 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in } load_flag_ = true; - GELOGI("LoadDumpInfo success, proto size: %zu.", proto_size); + GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size); return SUCCESS; } @@ -360,11 +397,12 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ return RT_FAILED; } load_flag_ = false; - GELOGI("UnloadDumpInfo success, proto size: %zu.", proto_size); + GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); return SUCCESS; } Status DataDumper::LoadDumpInfo() { - PrintCheckLog(); + std::string dump_list_key; + PrintCheckLog(dump_list_key); if (op_list_.empty()) { return SUCCESS; @@ -374,12 +412,13 @@ Status DataDumper::LoadDumpInfo() { auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); - op_mapping_info.set_model_name(model_name_); + op_mapping_info.set_model_name(dump_list_key); op_mapping_info.set_model_id(model_id_); op_mapping_info.set_flag(kAicpuLoadFlag); op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep()); SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); - GELOGD("Dump step in load dump info is %s", PropertiesManager::Instance().GetDumpStep().c_str()); + GELOGI("Dump step is %s and dump path is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(), + dump_path.c_str()); for (const auto &op_iter : op_list_) { aicpu::dump::Task task; @@ -441,7 +480,7 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput || PropertiesManager::Instance().GetDumpMode() == kDumpInput || PropertiesManager::Instance().GetDumpMode() == kDumpAll) { - GELOGI("add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); + GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); aicpu::dump::Task task; task.set_end_graph(true); task.set_task_id(end_graph_task_id_); @@ -477,7 +516,7 @@ Status DataDumper::UnloadDumpInfo() { return SUCCESS; } -void DataDumper::PrintCheckLog() { +void DataDumper::PrintCheckLog(string &dump_list_key) { std::set model_list = PropertiesManager::Instance().GetAllDumpModel(); if (model_list.empty()) { GELOGI("No model need dump."); @@ -485,19 +524,21 @@ void DataDumper::PrintCheckLog() { } GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str()); - if (model_list.find(ge::DUMP_ALL_MODEL) == model_list.end()) { - if (model_list.find(model_name_) == model_list.end()) { + bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); + bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); + if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { + if (not_find_by_omname && not_find_by_modelname) { std::string model_list_str; for (auto &model : model_list) { model_list_str += "[" + model + "]."; } - GELOGW("Model %s not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); + GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); return; } } - - std::set config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(model_name_); + dump_list_key = not_find_by_omname ? model_name_ : om_name_; + std::set config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key); std::set dump_op_list; for (auto &inner_dump_info : op_list_) { // oplist value OpDescPtr is not nullptr @@ -506,7 +547,7 @@ void DataDumper::PrintCheckLog() { for (auto &dump_op : config_dump_op_list) { if (dump_op_list.find(dump_op) == dump_op_list.end()) { - GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), model_name_.c_str()); + GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str()); } } } diff --git a/src/ge/graph/load/new_model_manager/data_dumper.h b/src/ge/graph/load/new_model_manager/data_dumper.h index efcc989a..ee5b3241 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.h +++ b/src/ge/graph/load/new_model_manager/data_dumper.h @@ -64,6 +64,8 @@ class DataDumper { void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr &op_desc, uintptr_t args); void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); + void SetOmName(const std::string &om_name) { om_name_ = om_name; } + Status LoadDumpInfo(); Status UnloadDumpInfo(); @@ -71,9 +73,13 @@ class DataDumper { private: void ReleaseDevMem(void **ptr) noexcept; - void PrintCheckLog(); + void PrintCheckLog(string &dump_list_key); std::string model_name_; + + // for inference data dump + std::string om_name_; + uint32_t model_id_; RuntimeParam runtime_param_; void *dev_mem_load_; @@ -107,6 +113,7 @@ struct DataDumper::InnerDumpInfo { int input_anchor_index; int output_anchor_index; std::vector dims; + int64_t data_size; }; struct DataDumper::InnerInputMapping { diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 46dd8201..45acee07 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -536,7 +536,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size compute_graph_ = GraphUtils::GetComputeGraph(graph); GE_CHK_BOOL_RET_STATUS(compute_graph_ != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr."); - runtime_param_.graph_id = GetGraphID(compute_graph_->GetName()); + runtime_param_.graph_id = compute_graph_->GetGraphID(); GE_TIMESTAMP_START(TransAllVarData); GE_CHK_STATUS_RET(TransAllVarData(compute_graph_, runtime_param_.graph_id), "TransAllVarData failed."); @@ -1535,7 +1535,10 @@ Status DavinciModel::GetOutputDescInfo(vector &output_desc, "construct output_name failed."); // forward compatbility, if old om has no out_node_name, need to return output follow origin way if (out_size == out_node_name.size()) { - output_name = out_node_name[index] + ":" + std::to_string(src_index[index]); + // neweast plan, the index will add to name during generate model. + bool contains_colon = out_node_name[index].find(":") != std::string::npos; + output_name = + contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); } else { output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); @@ -2510,51 +2513,19 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { - GELOGI("InitTaskInfo in,task size %zu", model_task_def.task().size()); + GELOGI("InitTaskInfo in,task size %d", model_task_def.task().size()); task_list_.resize(model_task_def.task_size()); - std::vector> futures(model_task_def.task_size()); - ThreadPool executor(kThreadNum); - rtContext_t ctx = nullptr; - rtError_t rt_ret = rtCtxGetCurrent(&ctx); - if (rt_ret != RT_ERROR_NONE || ctx == nullptr) { - GELOGE(RT_FAILED, "Failed to get current context from rt, error-code 0x%X.", rt_ret); - return RT_FAILED; - } - - for (int32_t i = 0; i < model_task_def.task_size(); ++i) { - std::future f = executor.commit( - [](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status { - rtError_t rt_ret = rtCtxSetCurrent(ctx); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", rt_ret); - return RT_FAILED; - } - Status ret = FAILED; - // dynamic shape will create task_list_ before - if (model->task_list_[idx] == nullptr) { - model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast(task.type())); - GE_CHECK_NOTNULL(model->task_list_[idx]); - } - ret = model->task_list_[idx]->Init(task, model); - return ret; - }, - model_task_def.task(i), this, ctx, i); - if (!f.valid()) { - GELOGE(FAILED, "Future is invalid"); - return FAILED; - } - futures[i] = std::move(f); - } - - Status ret; - for (size_t i = 0; i < futures.size(); ++i) { - ret = futures[i].get(); + for (int i = 0; i < model_task_def.task_size(); ++i) { + // dynamic shape will create task_list_ before + const domi::TaskDef &task = model_task_def.task(i); + task_list_[i] = TaskInfoFactory::Instance().Create(static_cast(task.type())); + GE_CHECK_NOTNULL(task_list_[i]); + Status ret = task_list_[i]->Init(task, this); if (ret != SUCCESS) { - GELOGE(ret, "Task index %zu init failed.", i); + GELOGE(ret, "Task index %d init failed.", i); return ret; } } - GELOGI("InitTaskInfo out"); return SUCCESS; } @@ -2623,7 +2594,7 @@ Status DavinciModel::DistributeTask() { return PARAM_INVALID; } - if (PropertiesManager::Instance().IsLayerNeedDump(name_, op->GetName())) { + if (PropertiesManager::Instance().IsLayerNeedDump(name_, om_name_, op->GetName())) { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } @@ -2661,8 +2632,9 @@ Status DavinciModel::DistributeTask() { void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || - all_dump_model.find(name_) != all_dump_model.end()) { + bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); + bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); + if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); data_dumper_.SaveEndGraphId(task_id, stream_id); } @@ -3344,17 +3316,6 @@ void DavinciModel::FreeWeightsMem() { } } -uint32_t DavinciModel::GetGraphID(const std::string &session_graph_id) { - std::string session_id = "_"; - auto pos = session_graph_id.find(session_id); - if (pos != std::string::npos) { - size_t graph_id_length = session_graph_id.length() - pos - session_id.length(); - std::string graph_id = session_graph_id.substr(pos + session_id.length(), graph_id_length); - return static_cast(std::strtol(graph_id.c_str(), nullptr, kDecimal)); - } - return 0; -} - Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id); rtContext_t ctx = nullptr; @@ -3387,6 +3348,7 @@ void DavinciModel::SetDataDumperArgs() { data_dumper_.SetModelName(name_); data_dumper_.SetModelId(model_id_); data_dumper_.SetMemory(runtime_param_); + data_dumper_.SetOmName(om_name_); int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 067fa112..3254a23b 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -187,6 +187,8 @@ class DavinciModel { // model name string Name() { return name_; } + // om_name + string OmName() { return om_name_; } // version uint32_t Version() const { return version_; } @@ -471,6 +473,8 @@ class DavinciModel { Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); Status GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, std::vector &output_dims); + // om file name + void SetOmName(string om_name) { om_name_ = om_name; } private: // memory address of weights @@ -752,8 +756,6 @@ class DavinciModel { void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); - uint32_t GetGraphID(const std::string &session_graph_id); - Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); Status CopyVarData(ComputeGraphPtr &graph); @@ -771,6 +773,10 @@ class DavinciModel { uint32_t model_id_; uint32_t runtime_model_id_; string name_; + + // used for inference data dump + string om_name_; + uint32_t version_; GeModelPtr ge_model_; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 8b17a35b..384e203b 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -820,6 +820,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model return FAILED; } davinci_model->SetDeviceId(device_id); + davinci_model->SetOmName(model.om_name); /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. /// These session_ids come from the same model, so the values of session_id are the same. diff --git a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc index a7b169bf..077ae827 100644 --- a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc @@ -47,7 +47,8 @@ Status EndGraphTaskInfo::Distribute() { GE_CHECK_NOTNULL(davinci_model_); auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || - all_dump_model.find(davinci_model_->Name()) != all_dump_model.end()) { + all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || + all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { GELOGI("Start to call rtEndGraphEx"); rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); if (rt_ret != RT_ERROR_NONE) { diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 95580a15..79971529 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -153,7 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); return FAILED;) - if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { + if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), + op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = input_output_addr_; } diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 390e4e99..7ef65555 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -63,7 +63,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci return ret; } - domi::KernelDef kernel_def = task_def.kernel(); + const domi::KernelDef &kernel_def = task_def.kernel(); block_dim_ = kernel_def.block_dim(); args_size_ = kernel_def.args_size(); // get opcontext stored in model @@ -549,7 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne return FAILED; } - if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { + if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), + op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = static_cast(args_) + offset; } @@ -818,7 +819,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return RT_FAILED; } - if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { + if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), + op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); } diff --git a/src/ge/graph/partition/graph_partition.cc b/src/ge/graph/partition/graph_partition.cc index 0dff2570..50cd7e81 100644 --- a/src/ge/graph/partition/graph_partition.cc +++ b/src/ge/graph/partition/graph_partition.cc @@ -105,9 +105,8 @@ void ge::GraphPartitioner::SetMergedGraphId(ge::ComputeGraphPtr &output_merged_c Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr &output_merged_compute_graph, const std::vector &sub_graph_list) { - ComputeGraphPtr new_sub_graph = MakeShared("mergedGraph"); - output_merged_compute_graph = new_sub_graph; - if ((new_sub_graph == nullptr) || (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) { + if ((output_merged_compute_graph == nullptr) || + (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MergeAllSubGraph failed."); return FAILED; } @@ -229,6 +228,9 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co return FAILED; } } + ComputeGraphPtr new_sub_graph = MakeShared(original_compute_graph->GetName()); + GE_CHECK_NOTNULL(new_sub_graph); + output_merged_compute_graph = new_sub_graph; GE_TIMESTAMP_START(MergeGraphRemoveNode); if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed"); diff --git a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc index a1f8b14a..3b4e4c19 100644 --- a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc +++ b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc @@ -70,6 +70,7 @@ OpDescPtr SameTransdataBreadthFusionPass::GetCastOp(const GeTensorDesc &in_desc, cast_op_name << "fusion_cast_" << fusion_cast_op_count++; auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); + node_op.BreakConnect(); if (cast_op == nullptr) { GELOGE(INTERNAL_ERROR, "new fusion cast op failed!"); return nullptr; diff --git a/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc index 92ae75e6..ba4cd031 100644 --- a/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -501,6 +501,7 @@ OpDescPtr TransOpWithoutReshapeFusionPass::GetCastOp(const GeTensorDesc &cast_in cast_op_name << "fusion_cast_op_" << fusion_cast_op_count++; auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); + node_op.BreakConnect(); if (cast_op == nullptr) { GELOGE(INTERNAL_ERROR, "new cast op failed!"); return nullptr; diff --git a/src/ge/graph/preprocess/graph_preprocess.cc b/src/ge/graph/preprocess/graph_preprocess.cc index 68382f52..9c82a06d 100644 --- a/src/ge/graph/preprocess/graph_preprocess.cc +++ b/src/ge/graph/preprocess/graph_preprocess.cc @@ -19,8 +19,6 @@ #include #include #include -#include "common/formats/format_transfers/format_transfer_fractal_nz.h" -#include "common/formats/format_transfers/format_transfer_fractal_z.h" #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_transpose.h" @@ -123,9 +121,6 @@ static std::map output_type_str_to_datatype = { {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; const char *const kMbatchSwitchnName = "mbatch-switch-name"; -const int64_t kGemmNdShapeSize = 2; -const int64_t kGemmAlignSize32 = 32; -const int64_t kGemmAlignSize16 = 16; OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { GeTensorPtr tensor = MakeShared(); @@ -1135,114 +1130,9 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No return SUCCESS; } -Status ProcessGemmFractalZ(GeShape &src_shape, std::vector &dst_shape_vec) { - dst_shape_vec.clear(); - if (src_shape.GetDims().size() != kGemmNdShapeSize) { - GELOGE(INTERNAL_ERROR, "gemm shape size must be 2"); - return FAILED; - } - dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(0), kGemmAlignSize32)); - dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(1), kGemmAlignSize16)); - dst_shape_vec.push_back(kGemmAlignSize16); - dst_shape_vec.push_back(kGemmAlignSize32); - return SUCCESS; -} -Status SetInOutForGemm(GeTensorDescPtr &input, GeTensorDescPtr &output, GeShape shape, Format format) { - input->SetShape(shape); - input->SetFormat(format); - output->SetShape(shape); - output->SetFormat(format); - int64_t input_shape_size = 0; - int64_t output_shape_size = 0; - ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size); - ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*output, output_shape_size); - if ((input_graph_status != ge::GRAPH_SUCCESS) && (output_graph_status != ge::GRAPH_SUCCESS)) { - GELOGE(GRAPH_FAILED, "GetTensorSize failed!"); - return FAILED; - } - ge::TensorUtils::SetSize(*input, input_shape_size); - ge::TensorUtils::SetSize(*output, output_shape_size); - return SUCCESS; -} - -Status ProcessSingleOpInput(NodePtr &node_ptr, string &single_op_input_format) { - ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_input_format); - auto op_desc = node_ptr->GetOpDesc(); - auto data_input = op_desc->MutableInputDesc(0); - auto data_output = op_desc->MutableOutputDesc(0); - ge::Format src_format = data_input->GetFormat(); - ge::DataType src_dt = data_input->GetDataType(); - ge::GeShape src_shape = data_input->GetShape(); - std::vector dst_shape_vec; - if (input_format == FORMAT_FRACTAL_NZ) { - formats::FormatTransferFractalNz transfer; - if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); - return FAILED; - } - ge::GeShape dst_shape(dst_shape_vec); - if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_NZ) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_NZ desc failed.", op_desc->GetName().c_str()); - return FAILED; - } - } else if (input_format == FORMAT_FRACTAL_Z) { - if (ProcessGemmFractalZ(src_shape, dst_shape_vec) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Op [%s] trans FRACTAL_Z Shape failed.", op_desc->GetName().c_str()); - return FAILED; - } - ge::GeShape dst_shape(dst_shape_vec); - if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_Z) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_Z desc failed.", op_desc->GetName().c_str()); - return FAILED; - } - } - // Gemm shape and format should be set at this stage, temporary solution. - auto out_anchor = node_ptr->GetOutDataAnchor(0); - for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { - GE_CHECK_NOTNULL(in_anchor); - auto index = static_cast(in_anchor->GetIdx()); - ge::NodePtr next_node = in_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(next_node); - auto next_op_desc = next_node->GetOpDesc(); - GE_CHECK_NOTNULL(next_op_desc); - auto input_desc = next_op_desc->MutableInputDesc(index); - GE_CHECK_NOTNULL(input_desc); - input_desc->SetFormat(input_format); - input_desc->SetShape(data_output->GetShape()); - } - return SUCCESS; -} - -Status ProcessSingleOpOutput(OpDescPtr &op_desc, string &single_op_output_format) { - ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_output_format); - auto data_input = op_desc->MutableInputDesc(0); - ge::Format src_format = data_input->GetFormat(); - ge::DataType src_dt = data_input->GetDataType(); - ge::GeShape src_shape = data_input->GetShape(); - std::vector dst_shape_vec; - if (input_format == FORMAT_FRACTAL_NZ) { - formats::FormatTransferFractalNz transfer; - if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); - return FAILED; - } - ge::GeShape dst_shape(dst_shape_vec); - data_input->SetShape(dst_shape); - data_input->SetFormat(FORMAT_FRACTAL_NZ); - } - return SUCCESS; -} - -Status ProcessDataNodeDynShape(NodePtr &node_ptr, bool &is_single_op) { +Status ProcessDataNodeDynShape(NodePtr &node_ptr) { auto op_desc = node_ptr->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - std::string single_op_input_format; - if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_input_format", single_op_input_format))) { - if (ProcessSingleOpInput(node_ptr, single_op_input_format) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Process single op input [%s] failed.", node_ptr->GetName().c_str()); - return FAILED; - } - } bool set_fp16 = false; if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) { return SUCCESS; @@ -1375,16 +1265,9 @@ bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_nod return false; } -Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type, bool &is_single_op) { +Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - std::string single_op_output_format; - if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_output_format", single_op_output_format))) { - if (ProcessSingleOpOutput(op_desc, single_op_output_format) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Process single op output [%s] failed.", node->GetName().c_str()); - return FAILED; - } - } ge::DataType output_data_type = ge::DT_FLOAT; for (const auto &in_anchor : node->GetAllInDataAnchors()) { @@ -1717,7 +1600,8 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { auto format = desc.GetFormat(); auto origin_format = desc.GetOriginFormat(); bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); - if (is_internal) { + bool need_check_internal_format = (!options_.is_single_op) && is_internal; + if (need_check_internal_format) { GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); return FAILED; @@ -2821,14 +2705,14 @@ Status GraphPrepare::UpdateInputOutputByOptions() { } if (node_ptr->GetType() == DATA) { - if (ProcessDataNodeDynShape(node_ptr, options_.is_single_op) != SUCCESS) { + if (ProcessDataNodeDynShape(node_ptr) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process data node failed"); return FAILED; } } if (node_ptr->GetType() == ge::NETOUTPUT) { - if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype, options_.is_single_op) != SUCCESS) { + if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput node failed"); return FAILED; } diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 49f4d3dc..2963cd5a 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -40,6 +40,23 @@ namespace ge { namespace { const char *const kMbatchSwitchnName = "mbatch-switch-name"; } // namespace +static void ConvertShape2Nhwc(Format &format, vector &shape_vec) { + if ((format == FORMAT_NHWC) || (shape_vec.size() != static_cast(NORMAL_TENSOR_SIZE))) { + return; + } + if (format != FORMAT_NCHW) { + GELOGW("The format is not NCHW, current format is %s", TypeUtils::FormatToSerialString(format).c_str()); + return; + } + vector shape_vec_tmp; + shape_vec.swap(shape_vec_tmp); + shape_vec.push_back(shape_vec_tmp[NCHW_DIM_N]); + shape_vec.push_back(shape_vec_tmp[NCHW_DIM_H]); + shape_vec.push_back(shape_vec_tmp[NCHW_DIM_W]); + shape_vec.push_back(shape_vec_tmp[NCHW_DIM_C]); + return; +} + Status InsertNewOpUtil::Init() { insert_op_conf_.reset((new (std::nothrow) domi::InsertNewOps())); GE_CHECK_NOTNULL(insert_op_conf_); @@ -223,11 +240,13 @@ Status InsertNewOpUtil::UpdatePrevNodeByAipp(NodePtr &node, std::set &s GELOGE(FAILED, "UpdateOutputDesc fail, graph_ret:%d", graph_ret); return FAILED; } - GELOGI("Get size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str()); + GELOGI("Get input size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str()); if (size == 0) { GELOGE(FAILED, "Can not get size from aipp [%s]", aipp_op_desc->GetName().c_str()); return FAILED; } + // Save the input size of aipp node, which will be used in dumping aipp node or fused aipp node + (void)AttrUtils::SetInt(aipp_input, ATTR_NAME_INPUT_ORIGIN_SIZE, size); auto in_data_anchor = node->GetInDataAnchor(0); GE_CHECK_NOTNULL(in_data_anchor); @@ -305,6 +324,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt auto data_opdesc = data->GetOpDesc(); GE_CHECK_NOTNULL(data_opdesc); + Format old_format = output_desc->GetFormat(); auto ret = data_opdesc->UpdateOutputDesc(0, *input_desc); if (ret != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to update data %s output using switchn %s", data->GetName().c_str(), @@ -317,9 +337,34 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt switchn->GetName().c_str()); return INTERNAL_ERROR; } + // Update attr _mbatch_origin_input_dims for data when it is linked to aipp + UpdateMultiBatchInputDims(data_opdesc, old_format); return SUCCESS; } +void InsertNewOpUtil::UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format) { + if (!data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { + GELOGW("Failed to acquire _mbatch_origin_input_dims attr from node [%s]", data_opdesc->GetName().c_str()); + return; + } + auto new_data_dims = data_opdesc->GetOutputDesc(0).GetShape().GetDims(); + vector origin_input_dims; + (void)AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); + // Convert origin_input_dims to NHWC because data format is set to NHWC when it is linked to aipp. + ConvertShape2Nhwc(old_format, origin_input_dims); + if (new_data_dims.size() != origin_input_dims.size()) { + return; + } + for (size_t i = 0; i < origin_input_dims.size(); ++i) { + // Need to update shape when aipp has crop function because H,W is different, ignore -1. + if (origin_input_dims[i] > 0) { + origin_input_dims[i] = new_data_dims[i]; + } + } + (void)AttrUtils::SetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); + return; +} + Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map> &data_next_node_map) { GELOGI("Start to get data and next node %s.", node->GetName().c_str()); OpDescPtr data_op = node->GetOpDesc(); diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index 8dad2012..b39b3005 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -61,6 +61,7 @@ class InsertNewOpUtil { std::unique_ptr insert_op_conf_; + void UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format); Status UpdatePrevNodeByAipp(NodePtr &node, std::set &switchns); Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data); Status GetDataRelatedNode(NodePtr &node, std::map> &data_next_node_map); diff --git a/src/ge/host_kernels/concat_v2_kernel.cc b/src/ge/host_kernels/concat_v2_kernel.cc index 81127302..c46b4277 100644 --- a/src/ge/host_kernels/concat_v2_kernel.cc +++ b/src/ge/host_kernels/concat_v2_kernel.cc @@ -31,6 +31,7 @@ namespace ge { namespace { const size_t kConcatV2InputNum = 3; +const int kSupportEmptyTensorRank = 1; const std::set concatv2_supported_type = {DT_INT32, DT_FLOAT}; template @@ -39,7 +40,12 @@ void GetOutputData(std::vector &y_data, int64_t loop, size_t &input_size, for (int64_t i = 0; i < loop; i++) { for (size_t k = 0; k < input_size; k++) { GeShape datak_shape = input.at(k)->GetTensorDesc().GetShape(); - const T *datak = reinterpret_cast(input.at(k)->GetData().data()); + auto buffer = input.at(k)->GetData(); + const T *datak = reinterpret_cast(buffer.data()); + if (datak == nullptr || buffer.size() == 0) { + GELOGW("input[%zu] is with no data", k); + continue; + } int64_t gapk = datak_shape.GetShapeSize() / loop; // [2,3] is 6/loop for (int64_t j = 0; j < gapk; j++) { y_data.push_back(datak[j + gapk * i]); @@ -63,7 +69,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vectorGetTensorDesc().GetDataType(); + GE_CHECK_NOTNULL(tensor); + DataType data_type = tensor->GetTensorDesc().GetDataType(); uint32_t length = 0; if (!TypeUtils::GetDataTypeLength(data_type, length)) { GELOGW("Can't GetDataTypeLength of data_type: %s", TypeUtils::DataTypeToSerialString(data_type).c_str()); @@ -91,7 +97,7 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vectorGetTensorDesc().GetShape(); + GeShape data0_shape = tensor->GetTensorDesc().GetShape(); int64_t loop = 1; for (int i = 0; i < tidx; i++) { loop *= data0_shape.GetDim(i); @@ -110,29 +116,33 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector &input, int &tidx) { +Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &input, int &tidx, + ConstGeTensorPtr &tensor) { size_t input_size = input.size(); // N >= 2 and N + 1 >= 3 if (input_size < kConcatV2InputNum) { GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum); return NOT_CHANGED; } - + bool has_empty_tensor = false; + input_size--; for (size_t i = 0; i < input_size; i++) { if (input[i] == nullptr) { GELOGI("Input%zu must not be null.", i); return NOT_CHANGED; } if (input.at(i)->GetData().size() == 0) { - GELOGI("Check data size fail. input%zu size is 0.", i); - return NOT_CHANGED; + GELOGW("input[%zu] is with no data.", i); + has_empty_tensor = true; + continue; + } + if (tensor == nullptr) { + tensor = input.at(i); // get first valid tensor with data } } - input_size--; - ConstGeTensorPtr tensor0 = input.at(0); - GE_CHECK_NOTNULL(tensor0); - DataType data_type = tensor0->GetTensorDesc().GetDataType(); + GE_CHECK_NOTNULL(tensor); + DataType data_type = tensor->GetTensorDesc().GetDataType(); for (size_t i = 1; i < input_size; i++) { if (data_type != input.at(i)->GetTensorDesc().GetDataType()) { GELOGI("Data type of N inputs for ConcatV2 not the same, check input %zu failed.", i); @@ -149,13 +159,18 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i ConstGeTensorPtr tensor_axis = input.at(input_size); GE_CHECK_NOTNULL(tensor_axis); const int *axis = reinterpret_cast(tensor_axis->GetData().data()); - tidx = axis[0]; // [-rank(values), rank(values)) - int dims = static_cast(tensor0->GetTensorDesc().GetShape().GetDimNum()); // rank + GE_CHECK_NOTNULL(axis); + tidx = axis[0]; // [-rank(values), rank(values)) + int rank = static_cast(tensor->GetTensorDesc().GetShape().GetDimNum()); // rank if (tidx < 0) { - tidx += dims; + tidx += rank; } - if (tidx < 0 || tidx > dims) { - GELOGI("ConcatV2 tidx not legal."); + // 1. tidx should in range [0,rank) + // 2. empty tensor only support case: [n],[m],[] + // case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported + if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) { + GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank, + has_empty_tensor); return NOT_CHANGED; } diff --git a/src/ge/host_kernels/concat_v2_kernel.h b/src/ge/host_kernels/concat_v2_kernel.h index c1514c80..353b7ed5 100644 --- a/src/ge/host_kernels/concat_v2_kernel.h +++ b/src/ge/host_kernels/concat_v2_kernel.h @@ -28,7 +28,7 @@ class ConcatV2Kernel : public Kernel { std::vector &v_output) override; private: - Status ConcatV2PreCompute(const std::vector &input, int &tidx); + Status ConcatV2PreCompute(const std::vector &input, int &tidx, ConstGeTensorPtr &tensor); }; } // namespace ge diff --git a/src/ge/offline/main.cc b/src/ge/offline/main.cc index 27309c1a..f77f006d 100644 --- a/src/ge/offline/main.cc +++ b/src/ge/offline/main.cc @@ -39,6 +39,7 @@ #include "ir_build/atc_ir_common.h" #include "omg/omg.h" #include "omg/parser/parser_factory.h" +#include "omg/parser/parser_inner_ctx.h" #include "parser/common/register_tbe.h" #include "register/op_registry.h" #include "single_op_parser.h" @@ -178,8 +179,6 @@ DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress w DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable"); -DEFINE_string(quant_optimize, "true", "Optional; enable quant optimize. true: enable; false(default): disable"); - DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null"); DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); @@ -253,6 +252,9 @@ class GFlagUtils { " --op_select_implmode Set op select implmode. Support high_precision, high_performance." "default: high_performance\n" "disable\n" + " --optypelist_for_implmode Appoint which op to use op_select_implmode, used with op_select_implmode ." + "Separate multiple nodes with commas (,). Use double quotation marks (\") to enclose each argument." + "E.g.: \"node_name1,node_name2\"\n" " --head_stream Add head stream. 0(default): disable; 1: enable\n" " --soc_version The soc version. E.g.: \"Ascend310\"\n" " --core_type Set core type AiCore or VectorCore. VectorCore: use vector core. " @@ -270,8 +272,7 @@ class GFlagUtils { "Use double quotation marks (\") to enclose each argument." "E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n" " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" - " --enable_single_stream Enable single stream. true: enable; false(default): disable\n" - " --quant_optimize Enable quant optimize. true(default): enable; false: disable\n"); + " --enable_single_stream Enable single stream. true: enable; false(default): disable\n"); gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); // Using gflags to analyze input parameters @@ -663,6 +664,27 @@ void LoadCustomOpLib() { } } +void SaveCustomCaffeProtoPath() { + GELOGI("Enter save custom caffe proto path."); + string customop_path; + + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + std::string path = path_env; + customop_path = path + "/framework/custom/caffe/"; + GELOGI("Get custom proto path from env : %s", path_env); + ge::GetParserContext().custom_proto_path = customop_path; + return; + } + std::string path_base = ge::GELib::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + customop_path = path_base + "ops/framework/custom/caffe/"; + ge::GetParserContext().custom_proto_path = customop_path; + return; +} + #endif Status CreateInputsForInference(const ge::Graph &graph, vector &inputs) { @@ -850,6 +872,7 @@ domi::Status GenerateModel(std::map &options, std::string output atc_params.insert(std::pair("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout)); atc_params.insert(std::pair("compress_weight_conf", FLAGS_compress_weight_conf)); atc_params.insert(std::pair(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); + atc_params.insert(std::pair("output", output)); Status ret = ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, @@ -982,6 +1005,8 @@ domi::Status GenerateOmModel() { // Load custom operator Library LoadCustomOpLib(); + SaveCustomCaffeProtoPath(); + ret = ge::CheckCustomAiCpuOpLib(); GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); @@ -1043,8 +1068,6 @@ domi::Status GenerateOmModel() { options.insert(std::pair(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); - options.insert(std::pair(string(ge::QUANT_OPTIMIZE), FLAGS_quant_optimize)); - SetDynamicBatchSizeOrImagesizeOptions(); if (!FLAGS_save_original_model.empty()) { diff --git a/src/ge/offline/single_op_parser.cc b/src/ge/offline/single_op_parser.cc index 067d39e2..4d589565 100644 --- a/src/ge/offline/single_op_parser.cc +++ b/src/ge/offline/single_op_parser.cc @@ -273,10 +273,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single } else { op_desc->AddInputDesc(desc.name, ge_tensor_desc); } - if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { - ge_tensor_desc.SetFormat(FORMAT_ND); - ge_tensor_desc.SetOriginFormat(FORMAT_ND); - } build_param.inputs.emplace_back(ge_tensor_desc); } @@ -292,10 +288,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single TensorUtils::SetInputTensor(ge_tensor_desc, false); TensorUtils::SetOutputTensor(ge_tensor_desc, true); op_desc->AddOutputDesc(ge_tensor_desc); - if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { - ge_tensor_desc.SetFormat(FORMAT_ND); - ge_tensor_desc.SetOriginFormat(FORMAT_ND); - } build_param.outputs.emplace_back(ge_tensor_desc); } diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc index 8fe31624..4754f9b9 100644 --- a/src/ge/session/omg.cc +++ b/src/ge/session/omg.cc @@ -29,6 +29,8 @@ #include "common/types.h" #include "common/util.h" #include "common/util/error_manager/error_manager.h" +#include "common/helper/model_helper.h" +#include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "framework/omg/parser/parser_inner_ctx.h" #include "google/protobuf/io/zero_copy_stream_impl.h" @@ -419,10 +421,6 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); return domi::FAILED; } - if (out_node->GetType() == DATA) { - GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", user_out_nodes[i].first.c_str()); - return domi::FAILED; - } auto op_desc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); if (i < output_formats.size()) { @@ -441,7 +439,7 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); } output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); - output_nodes_name.push_back(out_node->GetName()); + output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(user_out_nodes[i].second)); } // default output node (leaf) if (user_out_nodes.empty()) { @@ -468,7 +466,7 @@ Status GetOutputLeaf(NodePtr node, std::vector> if (node->GetType() != NETOUTPUT) { for (size_t index = 0; index < size; ++index) { output_nodes_info.push_back(std::make_pair(node, index)); - output_nodes_name.push_back(node->GetName()); + output_nodes_name.push_back(node->GetName() + ":" + std::to_string(index)); } } else { const auto in_anchors = node->GetAllInDataAnchors(); @@ -480,7 +478,7 @@ Status GetOutputLeaf(NodePtr node, std::vector> } auto out_node = out_anchor->GetOwnerNode(); output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); - output_nodes_name.push_back(out_node->GetName()); + output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(out_anchor->GetIdx())); } } return SUCCESS; @@ -612,9 +610,16 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::mapSetTarget(target); // Create an empty computegraph - ComputeGraphPtr compute_graph = nullptr; - GE_MAKE_SHARED(compute_graph = std::make_shared(kGraphDefaultName + "_" + CurrentTimeInStr()), - return FAILED); + std::string om_name; + ParseAtcParms(atc_params, "output", om_name); + ModelHelper model_helper; + string graph_name = ""; + Status name_ret = model_helper.GetBaseNameFromFileName(om_name, graph_name); + if (name_ret != SUCCESS) { + graph_name = kGraphDefaultName + "_" + CurrentTimeInStr(); + } + ComputeGraphPtr compute_graph = MakeShared(graph_name); + GE_CHECK_NOTNULL(compute_graph); graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); // initialize omgContext @@ -664,8 +669,6 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::mapCreateWeightsParser(type); ret = weights_parser->Parse(weights_file, graph); - GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); // IN ONLY_PRE_CHECK mode, generate pre inspection report only. - if (run_mode == ONLY_PRE_CHECK) { + if (PreChecker::Instance().HasError() || run_mode == ONLY_PRE_CHECK) { + std::string check_report; + ParseAtcParms(atc_params, "check_report", check_report); + GE_RETURN_WITH_LOG_IF_ERROR(PreChecker::Instance().Save(check_report), "Generate pre-checking report failed."); + GEEVENT("The pre-checking report has been saved to %s.", check_report.c_str()); return SUCCESS; } + // Prevent data residue in multiple calls + PreChecker::Instance().Clear(); + + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); GELOGI("ATC parser success."); diff --git a/src/proto/fusion_model.proto b/src/proto/fusion_model.proto index 2ff6b77a..6220963c 100644 --- a/src/proto/fusion_model.proto +++ b/src/proto/fusion_model.proto @@ -17,9 +17,10 @@ syntax = "proto3"; import "om.proto"; + package domi; message FusionModelDef { string version = 1; repeated OpDef fusion_op = 2; -} +} \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 097eccc5..04e1cea3 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1029,9 +1029,9 @@ REG_OP(BesselI1e) * y: A Tensor of type UnaryDataType. * @attention Constraints: -* @li "base" is supposed to be greater than 0. Retaining the default \n +* @li "base" is supposed to be greater than 0. Retaining the default * value "-1" sets "base" to "e". -* @li If the input value of operator Log is within the range (0, 0.01] or \n +* @li If the input value of operator Log is within the range (0, 0.01] or * [0.95, 1.05], the output accuracy is subject to change. * @par Third-party framework compatibility @@ -1047,11 +1047,11 @@ REG_OP(Log) .OP_END_FACTORY_REG(Log) /** -* @brief Returns x1 * x2 element-wise.\n +* @brief Returns x1 * x2 element-wise. * y = x1 * x2 * @par Inputs: -* @li x1: A Tensor. Must be one of the following types: float16, float32,\n +* @li x1: A Tensor. Must be one of the following types: float16, float32, * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. * @li x2: A Tensor. Must be one of the following types: float16, float32, * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. @@ -1079,7 +1079,7 @@ REG_OP(Mul) .OP_END_FACTORY_REG(Mul) /** -* @brief Computes the gradient of the square root of "x" with regard to its\n +* @brief Computes the gradient of the square root of "x" with regard to its * input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding * input gradient. @@ -3022,6 +3022,7 @@ REG_OP(CosineEmbeddingLoss) *@brief Kullback-Leibler divergence. *@par Inputs: +* Two inputs, including: *@li x: Tensor of arbitrary shape. *@li target: Tensor of the same shape and dtype as x. diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 625b0f85..29cf0df3 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -93,31 +93,49 @@ REG_OP(MatMulV2) *@par Inputs: *Five inputs, including: -*@li a: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. Has format [FRACTAL_NZ]. -*@li b: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. When type is int8, has format [FRACTAL_Z], \n otherwise has format [FRACTAL_NZ]. -*@li c: A matrix Tensor. 2D or higher. Must be one of the following types: \n float16, int32, float32. When type is int32, has format [ND], \n otherwise has format [FRACTAL_NZ]. -*@li alpha: A 1D Tensor. The shape of alpha is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND]. -*@li beta: A 1D Tensor. The shape of beta is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND]. +*@li a: A matrix Tensor. Must be one of the following types: float16, int8. +* Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). +*@li b: A matrix Tensor. Must be one of the following types: float16, int8. +* Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z). +*@li c: A matrix Tensor. Must be one of the following types: float16, int32, +* float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). +*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following +* types: float16, int32, float32. Has format [ND]. +*@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following +* types: float16, int32, float32. Has format [ND]. +* The format of a, b, c has restriction:\n +* When type of a is int8 and type of c is int32, the format of a, b, c should +* all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n +* When type of a is int8 and type of c is float32, the format of a, b, c should +* all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n +* When type of a is float16 and type of c is float16, the format of a, b, c +* should all be ND or FRACTAL_NZ.\n +* When type of a is float16 and type of c is float32, the format of a, b, c +* should all be ND or FRACTAL_NZ. *@par Attributes: *Two attributes, including: -*@li transpose_a: Optional. A bool.\n If True, changes the shape of "a" from [M, K] to [K, M].\n Reserved parameters, not used for now. -*@li transpose_b: Optional. A bool.\n If True, changes the shape of "b" from [M, K] to [K, M].\n Reserved parameters, not used for now. +*@li transpose_a: Optional. A bool. If True, changes the shape of "a" from +* [M, K] to [K, M]. +*@li transpose_b: Optional. A bool. If True, changes the shape of "b" from +* [K, N] to [N, K]. *@par Outputs: -*@out: The result matrix Tensor. 4D. Must be one of the following types:\n float16, float32, int32. Has format [FRACTAL_NZ]. +*y: The result matrix Tensor. Must be one of the following types: float16, +* float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a. +* 2D(ND) or 4D(FRACTAL_NZ). */ -REG_OP(Gemm) +REG_OP(GEMM) .INPUT(a, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(b, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OUTPUT(out, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .ATTR(transpose_a, Bool, false) .ATTR(transpose_b, Bool, false) - .OP_END_FACTORY_REG(Gemm) + .OP_END_FACTORY_REG(GEMM) /** *@brief Multiplies matrix "a" by matrix "b", producing "a * b". diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index b89287e9..e8eb4769 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -361,14 +361,14 @@ REG_OP(BatchNormGradExt2) *@par Inputs: *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. -*@li momentum: An optional string, input x's Scale factor +*@li variance: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the variance used for inference. +*@li momentum: A Tensor of type float32 or float16, represents the mean and the variance's scale factor *@li scale: An optional tensor of type float16 or float32, no use *@li offset: An optional tensor of type float16 or float32, no use *@par Attributes: *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". -*@li mode: An optional input, not use +*@li mode: An optional attr, not use *@par Outputs:\n *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" */ @@ -391,7 +391,7 @@ REG_OP(BNInference) *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. -*@li momentum: An optional float, input x's Scale factor +*@li momentum: A Tensor of type float32 or float16, the mean and the variance's Scale factor *@par Attributes: *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". @@ -420,13 +420,13 @@ REG_OP(BnHost) *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. -*@li momentum: An optional float, input x's Scale factor *@li scale: An optional tensor of type float16 or float32, no use *@li offset: An optional tensor of type float16 or float32, no use *@par Attributes: +*@li momentum: An optional float32 num, represents the mean and the variance's scale factor *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". -*@li mode: An optional inpout, not use +*@li mode: An optional attr, not use *@par Outputs:\n *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" */ diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index f904f191..5d4e6bff 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -62,7 +62,7 @@ namespace ge { * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * -* stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n +* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. @@ -119,7 +119,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * -* stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n +* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. @@ -178,7 +178,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * +* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n * @par Third-party framework compatibility @@ -235,7 +235,7 @@ REG_OP(DepthwiseConv2DBackpropInput) * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * +* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n * @par Third-party framework compatibility @@ -460,13 +460,10 @@ REG_OP(Conv2DBackpropInputD) *@par Inputs: * Three inputs: * @li x: A Tensor. Must have the same type as "filter". 4D with shape - * [batch, out_height, out_width, out_channels] - * or [batch, out_channels, out_height, out_width]. Gradients with respect + * [batch, out_channels, out_height, out_width]. Gradients with respect * to the output of the convolution. * @li filter: A Tensor of type float16. - * 4D with shape [filter_height, filter_width, in_channels, out_channels], - * or [out_channels, filter_height, filter_width, in_channels], - * or [out_channels, in_channel, filter_height, filter_width]. + * 4D with shape [out_channels, in_channel, filter_height, filter_width].\n * Two optional inputs: * @li bias: An optional tensor of type float16 * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n @@ -478,14 +475,14 @@ REG_OP(Conv2DBackpropInputD) * padding on the feature map * @li dilations: A tuple or list of 4 integers. The dilation factor for each * dimension of input. Must be [1, 1, 1, 1]. - * @li groups: Number of blocked connections from input channels to \n - output channels. - * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".\n + * @li groups: Number of blocked connections from input channels to + * output channels. + * @li data_format: An optional string from: "NCHW". Defaults to "NCHW".\n Specify the data format of the input and output data. * @li offset_x: An optional integer for quantized deconvolution. *@par Outputs: * y: A Tensor. Has the same type as "filter". 4D tensor with shape - * [batch, height, width, channels] or [batch, channels, height, width]. + * [batch, channels, height, width]. */ REG_OP(Deconvolution) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) @@ -493,11 +490,11 @@ REG_OP(Deconvolution) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) - .ATTR(strides, ListInt, {1, 1, 1, 1}) + .ATTR(strides, ListInt, {1, 1}) .ATTR(pads, ListInt, {0, 0, 0, 0}) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(groups, Int, 1) - .ATTR(data_format, String, "NHWC") + .ATTR(data_format, String, "NCHW") .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(Deconvolution) /** @@ -642,7 +639,7 @@ REG_OP(Conv2DBackpropFilterD) * @verbatim Output | Restrictions ------------------|---------------------------------------------- - W dimension == 1 | HxW(input) == HxW(filter) == 1x1,2x2...11x11. + W dimension == 1 | HxW(input) == HxW(filter) H dimension == 1 | ------------------|---------------------------------------------- W dimension == 1 | Not supported diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 7d6007d9..5dca8a9d 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -186,7 +186,7 @@ REG_OP(ROIAlignGrad) * Three inputs, including: \n *@li features: A 5HD Tensor of type float32 or float16. *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, -* "x0", "x1", "y0", and "y1". +* "x0", "y0", "x1", and "y1". *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved. *@par Attributes: diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index f167dbee..5eb11445 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -219,7 +219,7 @@ REG_OP(MaxPool3D) * @attention Constraints: * @li Computing gradients of global pooling is not supported, which means * "ksize < x1". -* @li "ksiez" is in the range [1, 255]. "strides" is in the range [1, 63] +* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] * @par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolGrad. @@ -239,10 +239,9 @@ REG_OP(MaxPoolGrad) * @brief Computes second-order gradients of the maxpooling function. * @par Inputs: -* @li x1: Original forward input tensor. Supported type:float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64. -* @li x2: Has the same type and format as input "x1". -* @li grad:Has the same type and format as input "x1". +* @li x1: Original forward input tensor of type RealNumberType +* @li x2: Original forward output tensor of type RealNumberType +* @li grad: Gradient tensor of type RealNumberType * @par Attributes: * @li ksize: A required list or tuple, @@ -258,9 +257,12 @@ REG_OP(MaxPoolGrad) * @li "x1" and "grads" must have the same shape. * @li "x2" and "y" must have the same shape. Otherwise, an error is reported. * @li "x1", "x2", "grads", and "y" must be 5D tensors. +* @li ksize[H] and ksize[W] is in the range [1, 255]. +* @li strides[H] and strides[W] is in the range [1, 63]. +* @li Other dimensions of ksize and strides is 1. * @par Outputs: -* @li y: Has the same type and format as input "x1". +* @li y: Result tensor of type RealNumberType * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator MaxPoolGradGrad. @@ -399,18 +401,15 @@ REG_OP(MaxPoolGradWithArgmax) * @brief Computes second-order gradients of the maxpooling function. * @par Inputs: -* @li x: Original forward input tensor. Supported type: float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64. -* @li grad: Gradient tensor. Supported type: float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64. -* @li argmax: An tensor of type int32 or int64. +* @li x: Original forward input tensor of type RealNumberType +* @li grad: Gradient tensor of type RealNumberType +* @li argmax: An tensor of type IndexNumberType * @par Attributes: * @li ksize: A required list, specifying the size of the sliding window. * @li strides: A required list, specifying the stride of the sliding window. * @li padding: A required string, window sliding mode. Either SAME or VALID. * @par Outputs: -* @li y:Result tensor. Supported type: float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64 +* @li y:Result tensor of type RealNumberType * @attention Constraints: * @li Only the cloud platform is supported. diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 17233386..1c9aa516 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -41,7 +41,7 @@ namespace ge { *@li beta1: A scalar. Has the same type as "var". *@li beta2: A scalar. Has the same type as "var". *@li epsilon: A scalar. Has the same type as "var". -*@li grad: A tensor for the gradient. Has the same type as "var". +*@li grad: A tensor for the gradient. Has the same type as "var". * *@par Attributes: * use_locking: An optional bool. Defaults to "False". @@ -465,7 +465,7 @@ REG_OP(ApplyKerasMomentumD) /** -*@brief Updates '*var' according to the Adam algorithm.. +*@brief Updates '*var' according to the Adam algorithm. * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t) * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g @@ -866,7 +866,7 @@ REG_OP(ApplyCenteredRMSProp) .OUTPUT(var, TensorType::NumberType()) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ApplyCenteredRMSProp) - + /** *@brief Updates "var" according to the centered RMSProp algorithm. * The centered RMSProp algorithm uses an estimate of the centered second moment @@ -1262,7 +1262,7 @@ REG_OP(DataFormatDimMap) .OP_END_FACTORY_REG(DataFormatDimMap) /** -* @brief Implements stochastic gradient descent (optionally with momentum).\n +* @brief Implements stochastic gradient descent (optionally with momentum). * Nesterov momentum is based on the formula from * On the importance of initialization and momentum in deep learning.\n @@ -1508,7 +1508,7 @@ REG_OP(ApplyProximalAdagradD) *@par Attributes: *use_locking: An optional bool. Defaults to "False".\n * If "True", updating of the var and accum tensors will be protected by a lock; \n -* If "False", the behavior is undefined, but may exhibit less contention. +* If "False", the behavior is undefined, but may exhibit less contention. *@par Outputs: *var: A mutable Tensor. Has the same type as "var". @@ -2172,13 +2172,13 @@ REG_OP(SparseApplyFtrl) * Should be a Variable Tensor. * @li grad: A Tensor of the same type as "var", for the gradient. * @li indices: A vector of indices into the first dimension of var and accum. + +* @par Attributes: * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. - -* @par Attributes: -* use_locking: An optional bool. Defaults to "False". +* @li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, * but may exhibit less contention. @@ -2314,6 +2314,7 @@ REG_OP(SparseApplyFtrlV2D) * var <- var - mom\n * * @par Inputs: +* Nine inputs, including: * @li var: A mutable tensor. Must be one of the data types defined in\n * TensorType::NumberType(). Should be from a Variable(). * @li ms: A mutable tensor. Must have the same type as "var". Should be from a @@ -2367,6 +2368,7 @@ REG_OP(SparseApplyRMSProp) * var <- var - mom * * @par Inputs: +* Six inputs, including: * @li var: A mutable tensor. Must be one of the data types defined in * TensorType::NumberType(). Should be from a Variable(). * @li ms: A mutable tensor. Must have the same type as "var". Should be from a @@ -2418,6 +2420,7 @@ REG_OP(SparseApplyRMSPropD) * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n * * @par Inputs: +* Eight inputs, including: * @li var: A mutable tensor. Must be one of the data types defined in\n * TensorType::NumberType(). Should be from a Variable(). * @li accum: A mutable tensor. Must have the same type as "var". Should be from a @@ -2468,6 +2471,7 @@ REG_OP(SparseApplyAdadelta) * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n * * @par Inputs: +* Seven inputs, including: * @li var: A mutable tensor. Must be one of the data types defined in * TensorType::NumberType(). Should be from a Variable(). * @li accum: A mutable tensor. Must have the same type as "var". Should be from a diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index d38faf49..1405fdb7 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -203,11 +203,11 @@ REG_OP(Sigmoid) * @brief Computes z = (y - y*y)*dy. * @par Inputs: -* @li y: the input is tensor , dtype is UnaryDataType. -* @li dy the input is tensor , dtype is UnaryDataType. +* @li y: The input is Tensor, dtype is UnaryDataType. +* @li dy: The input is Tensor, dtype is UnaryDataType. * @par Outputs: -* z: the shape of output, dtype is UnaryDataType. +* z: The shape of output, dtype is UnaryDataType. */ REG_OP(SigmoidGrad) .INPUT(y, TensorType(UnaryDataType)) diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 4a4bd606..4bf0e5bf 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -21,17 +21,17 @@ namespace ge { /** -* @brief Dequantizes the input tensor into a float tensor.\n -* [input_min_range, input_max_range] are scalar floats that specify the range -* for "output_data". \n +* @brief Dequantizes the input tensor into a float tensor. +* [min_range, max_range] are float32 tensors that specify the range +* for "y". \n * The "mode" attribute controls exactly which calculations are used to convert\n * the float values to their quantized equivalents. * @par Inputs: -* @li input_data: A Tensor. Must be one of the following types: int8, uint8, +* @li x: A Tensor. Must be one of the following types: int8, uint8, * int32. -* @li input_min_range: A Tensor of type float32. +* @li min_range: A Tensor of type float32. * Specifies the minimum scalar value possibly produced for the input. -* @li input_max_range: A Tensor of type float32. +* @li max_range: A Tensor of type float32. * Specifies the maximum scalar value possibly produced for the input. * @par Attributes: @@ -39,11 +39,11 @@ namespace ge { * Defaults to "MIN_COMBINED". * @par Outputs: -* output_data: A dictionary of type float32. +* y: A dictionary of type float32. * @attention Constraints: -* @li "input_min_range" and "input_max_range" have the same shapes. -* @li "input_data" and "output_data" have the same shapes. +* @li "min_range" and "max_range" have the same shapes. +* @li "x" and "y" have the same shapes. * @par Third-party framework compatibility * Compatible with the TensorFlow operator Dequantize. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 95bcd039..aafcece0 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -149,7 +149,7 @@ REG_OP(TileD) * @li indices: A Tensor of type IndexNumberType. * @par Outputs: -* output: A Tensor of type BasicType. +* y: A Tensor of type BasicType. * @see GatherNd() * @attention Constraints: @@ -767,6 +767,7 @@ REG_OP(SliceD) * dimension. * @par Inputs: +* Two inputs, including: * @li x: A 1D or higher tensor of type float16, with the last dimension at * least "k". * Specifies the data to sort. @@ -789,7 +790,7 @@ REG_OP(SliceD) * @li indices: A Tensor of type int32, specifying the indices of sorted data. * @attention Constraints: -* @li k =< 4096 +* @li k =< 5120 * @li Size of the last dimension =< 65500 * @li sorted = true * @li Don't support to get score on the platform of Ascend310 @@ -813,6 +814,7 @@ REG_OP(TopKD) * dimension. * @par Inputs: +* Two inputs, including: * @li x: A 1D or higher tensor of type BasicType, with the last dimension * at least "k". * @li k: A 0D Tensor of type int32.\n @@ -902,8 +904,8 @@ REG_OP(ScatterNdD) * @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids. * @par Attributes: -* @li k: A required int32, specifying the number of top elements to look at for -* computing precision. +* @li k: A required IndexNumberType, specifying the number of top elements to +* look at for computing precision. * @par Outputs: * y: A Tensor of type bool. @@ -1000,6 +1002,7 @@ REG_OP(StridedSliceAssign) * "strides", etc. work exactly as in "StridedSlice". * @par Inputs: +* Two inputs, including: * @li var: A mutable ND Tensor of type BasicType. * @li input_value: A mutable ND "Tensor" of type BasicType. @@ -1335,7 +1338,7 @@ REG_OP(InplaceSubD) .OP_END_FACTORY_REG(InplaceSubD) /** -* @brief Applies sparse addition to input "x" using individual values or slices\n +* @brief Applies sparse addition to input "x" using individual values or slices * from "updates" according to "indices". The updates are non-aliasing: "x" is\n * only modified in-place if no other operations will use it. Otherwise, a copy\n * of "x" is made. This operation has a gradient with respect to both "x" and @@ -1372,7 +1375,7 @@ REG_OP(ScatterNonAliasingAdd) * @li x: A Tensor of type RealNumberType. * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -* @li k: A Tensor. +* @li num_segments: A Tensor of type IndexNumberType. * @par Outputs: * y: A Tensor of type RealNumberType. @@ -1419,13 +1422,13 @@ REG_OP(UnsortedSegmentMinD) * @par Inputs: * Three inputs, including: -* @li x: A Tensor of type RealNumberType. +* @li x: A Tensor of type NumberType. * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -* @li k: A Tensor. +* @li num_segments: A Tensor of type IndexNumberType. * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor of type NumberType. * @see UnsortedSegmentSum(), UnsortedSegmentMin(), diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index a8258eb9..69951da9 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -20,19 +20,38 @@ #include "graph/operator_reg.h" namespace ge { +/** +*@brief Convert tensor format from HWCN to C1HWNCoC0. + +*@par Inputs: +*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN. + +*@par Outputs: +*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. +*/ REG_OP(DepthwiseWeight4DTo6D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D) +/** +*@brief Convert tensor format from C1HWNCoC0 to HWCN. + +*@par Inputs: +*x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0. + +*@par Attributes: +*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN. + +*@par Outputs: +*y: A 4D Tensor. Has the same type as "x", with format HWCN. +*/ REG_OP(DepthwiseWeight6DTo4D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) .ATTR(channel_size, Int, 16) .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D) - - /** *@brief Permutes the dimensions according to perm.\n The returned tensor's dimension i will correspond to the input dimension perm[i]. @@ -390,20 +409,20 @@ REG_OP(SpaceToBatchD) .OP_END_FACTORY_REG(SpaceToBatchD) /** -* @brief Unpacks the given dimension of a rank-R tensor "x" into rank-(R-1) +* @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1) * tensors. * @par Inputs: * x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. * @par Attributes: -* @li num: An optional int, specifying the number of tensors to be unpacked to. +* @li num: A required int, specifying the number of tensors to be unpacked to. * Defaults to "None". -* @li axis: A required int, specifying the axis to unpack along. The value range +* @li axis: An optional int, specifying the axis to unpack along. The value range * is [-R, R). * @par Outputs: -* y: The list of Tensor objects unpacked from "x", of type BasicType. +* y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType. * @attention Constraints: * @li If "num" is not specified, it is inferred from the shape of "x". @@ -434,11 +453,11 @@ REG_OP(Unpack) * dimension of images. * @li strides: A required list or tuple. How far the centers of two consecutive * patches are in the images. Must be: [1, stride_rows, stride_cols, 1]. -* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1]. \n -* This is the input stride, specifying how far two consecutive patch \n +* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].\n +* This is the input stride, specifying how far two consecutive patch\n * samples are in the input. Equivalent to extracting patches * with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n -* (rates - 1), followed by subsampling them spatially by a factor of rates. \n +* (rates - 1), followed by subsampling them spatially by a factor of rates.\n * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. * @li padding: A required string. The type of padding algorithm to use. diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h index 137309b2..1fcdf9de 100644 --- a/third_party/fwkacllib/inc/register/op_registry.h +++ b/third_party/fwkacllib/inc/register/op_registry.h @@ -59,6 +59,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { domi::ParseParamFunc GetParseParamFunc(const std::string &op_type); + domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &op_type); + domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type); domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); @@ -73,6 +75,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { std::unordered_map> op_ori_optype_map_; std::unordered_map op_run_mode_map_; std::unordered_map opParseParamsFnMap_; + std::unordered_map parse_params_by_op_func_map_; std::unordered_map fusionOpParseParamsFnMap_; std::unordered_map op_types_to_parse_subgraph_post_func_; std::unordered_map> remove_input_configure_map_;