Synchronize latest Ascend software suite 16 Jul 2020

5 years ago · 97b8c6fb91
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -204,9 +204,6 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
 // Save original model file name
 const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile";

 // FE enable quant optimize
 const std::string QUANT_OPTIMIZE = "ge.quantOptimize";

 const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
 const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
 const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";
@@ -274,7 +271,6 @@ static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
 static const char *const AICORE_NUM = ge::AICORE_NUM.c_str();
 static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
 static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
 static const char *const QUANT_OPTIMIZE = ge::QUANT_OPTIMIZE.c_str();
 static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
 static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
 static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
@@ -304,7 +300,6 @@ const std::set<std::string> global_options = {CORE_TYPE,
                                              AICORE_NUM,
                                              FUSION_SWITCH_FILE,
                                              ENABLE_SMALL_CHANNEL,
                                              QUANT_OPTIMIZE,
                                              OP_SELECT_IMPL_MODE,
                                              OPTYPELIST_FOR_IMPLMODE};
 }  // namespace ir_option
--- a/inc/external/graph/operator.h
+++ b/inc/external/graph/operator.h
@@ -43,6 +43,7 @@
 #define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt")

 namespace ge {
 class Operator;
 class OperatorImpl;
 class NamedAttrs;
 class Graph;
@@ -50,6 +51,7 @@ class AttrValue;

 using SubgraphBuilder = std::function<Graph()>;
 using OperatorImplPtr = std::shared_ptr<OperatorImpl>;
 using OperatorPtr = std::shared_ptr<Operator>;

 class OpIO;
 using OutHandler = std::shared_ptr<OpIO>;
--- a/inc/external/register/register.h
+++ b/inc/external/register/register.h
@@ -67,6 +67,7 @@ using google::protobuf::Message;
 class OpRegistrationDataImpl;

 using ParseParamFunc = std::function<domi::Status(const google::protobuf::Message *, ge::Operator &)>;
 using ParseParamByOpFunc = std::function<domi::Status(const ge::Operator &, ge::Operator &)>;
 using FusionParseParamFunc =
  std::function<domi::Status(const std::vector<const google::protobuf::Message *>, ge::Operator &)>;
 using ParseSubgraphFunc = std::function<Status(const std::string &subgraph_name, const ge::Graph &graph)>;
@@ -85,6 +86,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData {

  OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn);

  OpRegistrationData &ParseParamsByOperatorFn(const ParseParamByOpFunc &parse_param_by_op_fn);

  OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn);

  OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn);
@@ -100,6 +103,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData {
  std::set<std::string> GetOriginOpTypeSet() const;
  domi::FrameworkType GetFrameworkType() const;
  ParseParamFunc GetParseParamFn() const;
  ParseParamByOpFunc GetParseParamByOperatorFn() const;
  FusionParseParamFunc GetFusionParseParamFn() const;
  ParseSubgraphFunc GetParseSubgraphPostFn() const;

--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -183,6 +183,7 @@ struct ModelData {
  uint32_t model_len = 0;      // Model binary data length
  int32_t priority = 0;        // Model priority
  std::string key;             // Key path for encrypt model, Empty for unencrypt
  std::string om_name;         // om file name, used for data dump
 };

 // The definition of Model information
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -46,6 +46,8 @@ class ModelHelper {

  static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model);
  static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr);
  Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name);
  Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name);

 private:
  bool is_assign_model_ = false;
--- a/inc/framework/ge_runtime/model_runner.h
+++ b/inc/framework/ge_runtime/model_runner.h
@@ -28,21 +28,16 @@
 namespace ge {
 namespace model_runner {
 class RuntimeModel;
 using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;

 class ModelRunner {
 public:
  static ModelRunner &Instance();

  bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id,
                        std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener);
  bool LoadModelComplete(uint32_t model_id);

  const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const;

  const std::vector<uint32_t> &GetStreamIdList(uint32_t model_id) const;

  const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap(uint32_t model_id) const;

  bool UnloadModel(uint32_t model_id);

  bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data);
--- a/inc/framework/ge_runtime/task_info.h
+++ b/inc/framework/ge_runtime/task_info.h
@@ -21,7 +21,6 @@
 #include <functional>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>

 #include "cce/taskdown_api.h"
@@ -53,27 +52,21 @@ class TaskInfo {
  virtual ~TaskInfo() {}
  uint32_t stream_id() const { return stream_id_; }
  TaskInfoType type() const { return type_; }
  std::string op_name() const { return op_name_; }
  bool dump_flag() const { return dump_flag_; }

 protected:
  TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag)
      : op_name_(op_name), stream_id_(stream_id), type_(type), dump_flag_(dump_flag) {}
  TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {}

 private:
  std::string op_name_;
  uint32_t stream_id_;
  TaskInfoType type_;
  bool dump_flag_;
 };

 class CceTaskInfo : public TaskInfo {
 public:
  CceTaskInfo(const std::string &op_name, uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func,
              uint32_t block_dim, const std::vector<uint8_t> &args, uint32_t args_size,
              const std::vector<uint8_t> &sm_desc, const std::vector<uint8_t> &flow_table,
              const std::vector<uint8_t> &args_offset, bool is_flowtable)
      : TaskInfo(op_name, stream_id, TaskInfoType::CCE, false),
  CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim,
              const std::vector<uint8_t> &args, uint32_t args_size, const std::vector<uint8_t> &sm_desc,
              const std::vector<uint8_t> &flow_table, const std::vector<uint8_t> &args_offset, bool is_flowtable)
      : TaskInfo(stream_id, TaskInfoType::CCE),
        ctx_(ctx),
        stub_func_(stub_func),
        block_dim_(block_dim),
@@ -109,11 +102,11 @@ class CceTaskInfo : public TaskInfo {

 class TbeTaskInfo : public TaskInfo {
 public:
  TbeTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string &stub_func, uint32_t block_dim,
              const std::vector<uint8_t> &args, uint32_t args_size, const std::vector<uint8_t> &sm_desc, void *binary,
              uint32_t binary_size, const std::vector<uint8_t> &meta_data, const std::vector<void *> &input_data_addrs,
              const std::vector<void *> &output_data_addrs, const std::vector<void *> &workspace_addrs, bool dump_flag)
      : TaskInfo(op_name, stream_id, TaskInfoType::TBE, dump_flag),
  TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector<uint8_t> &args,
              uint32_t args_size, const std::vector<uint8_t> &sm_desc, void *binary, uint32_t binary_size,
              const std::vector<uint8_t> &meta_data, const std::vector<void *> &input_data_addrs,
              const std::vector<void *> &output_data_addrs, const std::vector<void *> &workspace_addrs)
      : TaskInfo(stream_id, TaskInfoType::TBE),
        stub_func_(stub_func),
        block_dim_(block_dim),
        args_(args),
@@ -160,10 +153,9 @@ class TbeTaskInfo : public TaskInfo {

 class AicpuTaskInfo : public TaskInfo {
 public:
  AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name,
                const std::string &node_def, const std::vector<void *> &input_data_addrs,
                const std::vector<void *> &output_data_addrs, bool dump_flag)
      : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag),
  AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def,
                const std::vector<void *> &input_data_addrs, const std::vector<void *> &output_data_addrs)
      : TaskInfo(stream_id, TaskInfoType::AICPU),
        so_name_(so_name),
        kernel_name_(kernel_name),
        node_def_(node_def),
@@ -185,45 +177,37 @@ class AicpuTaskInfo : public TaskInfo {
  std::vector<void *> output_data_addrs_;
 };

 class LabelSetTaskInfo : public TaskInfo {
 class LabelTaskInfo : public TaskInfo {
 public:
  LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {}
  ~LabelSetTaskInfo() override {}
  uint32_t label_id() const { return label_id_; }

 private:
 protected:
  LabelTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t label_id)
      : TaskInfo(stream_id, type), label_id_(label_id) {}
  virtual ~LabelTaskInfo() override {}

  uint32_t label_id_;
 };

 class LabelGotoTaskInfo : public TaskInfo {
 class LabelSetTaskInfo : public LabelTaskInfo {
 public:
  LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {}
  ~LabelGotoTaskInfo() override {}
  uint32_t label_id() const { return label_id_; }

 private:
  uint32_t label_id_;
  LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id)
      : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {}
  ~LabelSetTaskInfo() override {}
 };

 class LabelSwitchTaskInfo : public TaskInfo {
 class LabelSwitchTaskInfo : public LabelTaskInfo {
 public:
  LabelSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_size,
                      const std::vector<uint32_t> &label_list, void *cond)
      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SWITCH, false),
        label_size_(label_size),
        label_list_(label_list),
        cond_(cond) {}
  LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id)
      : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {}
  ~LabelSwitchTaskInfo() override {}
  uint32_t label_size() { return label_size_; };
  const std::vector<uint32_t> &label_list() { return label_list_; };
  void *cond() { return cond_; };
 };

 private:
  uint32_t label_size_;
  std::vector<uint32_t> label_list_;
  void *cond_;
 class LabelGotoTaskInfo : public LabelTaskInfo {
 public:
  LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id)
      : LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {}
  ~LabelGotoTaskInfo() override {}
 };

 class EventTaskInfo : public TaskInfo {
@@ -231,8 +215,8 @@ class EventTaskInfo : public TaskInfo {
  uint32_t event_id() const { return event_id_; }

 protected:
  EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id)
      : TaskInfo(op_name, stream_id, type, false), event_id_(event_id) {}
  EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id)
      : TaskInfo(stream_id, type), event_id_(event_id) {}
  virtual ~EventTaskInfo() override {}

  uint32_t event_id_;
@@ -240,41 +224,39 @@ class EventTaskInfo : public TaskInfo {

 class EventRecordTaskInfo : public EventTaskInfo {
 public:
  EventRecordTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id)
      : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_RECORD, event_id) {}
  EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id)
      : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {}
  ~EventRecordTaskInfo() override {}
 };

 class EventWaitTaskInfo : public EventTaskInfo {
 public:
  EventWaitTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id)
      : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_WAIT, event_id) {}
  EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id)
      : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {}
  ~EventWaitTaskInfo() override {}
 };

 class FusionStartTaskInfo : public TaskInfo {
 public:
  explicit FusionStartTaskInfo(const std::string &op_name, uint32_t stream_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_START, false) {}
  explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {}
  ~FusionStartTaskInfo() override {}
 };

 class FusionEndTaskInfo : public TaskInfo {
 public:
  explicit FusionEndTaskInfo(const std::string &op_name, uint32_t stream_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_END, false) {}
  explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {}
  ~FusionEndTaskInfo() override {}
 };

 class HcclTaskInfo : public TaskInfo {
 public:
  HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
               void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
  HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr,
               void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
               const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
               int64_t op_type, int64_t data_type, const std::string &group,
               std::function<bool(void *, void *)> hcom_bind_model, std::function<bool(void *)> hcom_unbind_model,
               std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task, bool dump_flag)
      : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
               int64_t op_type, int64_t data_type, std::function<bool(void *, void *)> hcom_bind_model,
               std::function<bool(void *)> hcom_unbind_model,
               std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task)
      : TaskInfo(stream_id, TaskInfoType::HCCL),
        hccl_type_(hccl_type),
        input_data_addr_(input_data_addr),
        output_data_addr_(output_data_addr),
@@ -287,7 +269,6 @@ class HcclTaskInfo : public TaskInfo {
        root_id_(root_id),
        op_type_(op_type),
        data_type_(data_type),
        group_(group),
        hcom_bind_model_(hcom_bind_model),
        hcom_unbind_model_(hcom_unbind_model),
        hcom_distribute_task_(hcom_distribute_task) {}
@@ -305,7 +286,6 @@ class HcclTaskInfo : public TaskInfo {
  int64_t root_id() const { return root_id_; }
  int64_t op_type() const { return op_type_; }
  int64_t data_type() const { return data_type_; }
  const std::string &group() const { return group_; }
  std::function<bool(void *, void *)> hcom_bind_model() const { return hcom_bind_model_; }
  std::function<bool(void *)> hcom_unbind_model() const { return hcom_unbind_model_; }
  std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task() const {
@@ -325,7 +305,6 @@ class HcclTaskInfo : public TaskInfo {
  int64_t root_id_;
  int64_t op_type_;
  int64_t data_type_;
  std::string group_;
  std::function<bool(void *, void *)> hcom_bind_model_;
  std::function<bool(void *)> hcom_unbind_model_;
  std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_;
@@ -333,11 +312,8 @@ class HcclTaskInfo : public TaskInfo {

 class ProfilerTraceTaskInfo : public TaskInfo {
 public:
  ProfilerTraceTaskInfo(const std::string &op_name, uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
      : TaskInfo(op_name, stream_id, TaskInfoType::PROFILER_TRACE, false),
        log_id_(log_id),
        notify_(notify),
        flat_(flat) {}
  ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
      : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {}
  ~ProfilerTraceTaskInfo() override {}

  uint64_t log_id() const { return log_id_; }
@@ -352,9 +328,8 @@ class ProfilerTraceTaskInfo : public TaskInfo {

 class MemcpyAsyncTaskInfo : public TaskInfo {
 public:
  MemcpyAsyncTaskInfo(const std::string &op_name, uint32_t stream_id, void *dst, uint64_t dst_max, void *src,
                      uint64_t count, uint32_t kind, bool dump_flag)
      : TaskInfo(op_name, stream_id, TaskInfoType::MEMCPY_ASYNC, dump_flag),
  MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind)
      : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC),
        dst_(dst),
        dst_max_(dst_max),
        src_(src),
@@ -378,9 +353,9 @@ class MemcpyAsyncTaskInfo : public TaskInfo {

 class StreamSwitchTaskInfo : public TaskInfo {
 public:
  StreamSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, int64_t true_stream_id, void *input_addr,
                       void *value_addr, int64_t cond, int64_t data_type)
      : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_SWITCH, false),
  StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond,
                       int64_t data_type)
      : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH),
        true_stream_id_(true_stream_id),
        input_addr_(input_addr),
        value_addr_(value_addr),
@@ -404,8 +379,8 @@ class StreamSwitchTaskInfo : public TaskInfo {

 class StreamActiveTaskInfo : public TaskInfo {
 public:
  StreamActiveTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t active_stream_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {}
  StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id)
      : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {}
  ~StreamActiveTaskInfo() override {}

  uint32_t active_stream_id() const { return active_stream_id_; }
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -181,6 +181,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS;

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE;

 // to be deleted
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION;
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -154,6 +154,7 @@ const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id";
 const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start";
 const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size";
 const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims";
 const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size";

 // To be deleted
 const std::string ATTR_TO_BE_DELETED = "to_be_deleted";
--- a/src/common/graph/node.cc
+++ b/src/common/graph/node.cc
@@ -759,6 +759,7 @@ graphStatus Node::Verify() const {
        GELOGW("Verify UpdateOutputName failed");
      }
    }
    node_op.BreakConnect();
  }

  if (op_->CommonVerify() == GRAPH_SUCCESS) {
--- a/src/common/graph/op_desc.cc
+++ b/src/common/graph/op_desc.cc
@@ -818,7 +818,9 @@ graphStatus OpDesc::InferShapeAndType() {
    }
  }
  Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
  return (graphStatus)infer_func_(op_proxy);
  graphStatus ret = (graphStatus)infer_func_(op_proxy);
  op_proxy.BreakConnect();
  return ret;
 }

 graphStatus OpDesc::DefaultInferFormat() {
@@ -863,12 +865,14 @@ graphStatus OpDesc::DefaultInferFormat() {
 }

 graphStatus OpDesc::OpVerify() {
  Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
  if (verifier_func_ == nullptr) {
    verifier_func_ = OperatorFactoryImpl::GetVerifyFunc(GetType());
  }
  if (verifier_func_ != nullptr) {
    return (graphStatus)verifier_func_(op_proxy);
    Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
    graphStatus ret = (graphStatus)verifier_func_(op_proxy);
    op_proxy.BreakConnect();
    return ret;
  }
  return GRAPH_SUCCESS;
 }
--- a/src/common/graph/operator.cc
+++ b/src/common/graph/operator.cc
@@ -931,7 +931,7 @@ OperatorImplPtr Operator::GetOperatorImplPtr() const { return operator_impl_; }

 void Operator::BreakConnect() const {
  if (operator_impl_ == nullptr) {
    GELOGE(GRAPH_FAILED, "operator impl is nullptr.");
    GELOGW("operator impl is nullptr.");
    return;
  }
  operator_impl_->ClearInputLinks();
@@ -1318,6 +1318,8 @@ class GraphBuilderImpl {
      string type = src_op_impl->op_desc_->GetType();
      auto node_op = ge::OperatorFactory::CreateOperator("node_op", type);
      auto tensor_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op);
      node_op.BreakConnect();

      GE_CHK_BOOL_EXEC(tensor_desc != nullptr, continue, "tensor_desc is null.");
      if ((tensor_desc->GetInputsSize() == 0 && tensor_desc->GetOutputsSize() > 0) || type == DATA ||
          type == VARIABLE || type == INITDATA || type == GETNEXT) {
--- a/src/common/graph/shape_refiner.cc
+++ b/src/common/graph/shape_refiner.cc
@@ -235,6 +235,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &

    GELOGD("get op from OperatorFactory success. opType: %s", op_type.c_str());
    auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op);
    node_op.BreakConnect();
    if (temp_op_desc == nullptr) {
      GELOGE(GRAPH_FAILED, "temp op desc is null");
      return GRAPH_FAILED;
--- a/src/ge/common/ge/tbe_plugin_manager.cc
+++ b/src/ge/common/ge/tbe_plugin_manager.cc
@@ -187,12 +187,9 @@ void TBEPluginManager::LoadCustomOpLib() {
  std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
  GELOGI("The size of registration_datas is: %zu", registration_datas.size());
  for (OpRegistrationData reg_data : registration_datas) {
    bool ret = CheckRegisterStatus(reg_data);
    if (ret) {
      GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
             static_cast<uint32_t>(reg_data.GetImplyType()));
      domi::OpRegistry::Instance()->Register(reg_data);
    }
    GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
           static_cast<uint32_t>(reg_data.GetImplyType()));
    domi::OpRegistry::Instance()->Register(reg_data);
  }
 }

@@ -230,31 +227,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPlug
  }
 }

 bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData &reg_data) {
  bool ret = true;
  static char *parser_priority = std::getenv("PARSER_PRIORITY");
  static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce";
  auto ori_optype_set = reg_data.GetOriginOpTypeSet();
  for (const auto &op_type : ori_optype_set) {
    domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type);
    GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str());
    if (imply_type != domi::ImplyType::BUILDIN) {
      if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) ||
          (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) {
        GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(),
               reg_data.GetOmOptype().c_str());
        ret = false;
      } else {
        GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str());
      }
    } else {
      GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(),
             reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType()));
    }
  }
  return ret;
 }

 Status TBEPluginManager::CheckCustomAiCpuOpLib() {
  std::vector<std::string> vec_op_type;

--- a/src/ge/common/ge/tbe_plugin_manager.h
+++ b/src/ge/common/ge/tbe_plugin_manager.h
@@ -63,7 +63,6 @@ class TBEPluginManager {
  static void GetCustomOpPath(std::string &customop_path);
  void LoadCustomOpLib();
  static Status CheckCustomAiCpuOpLib();
  static bool CheckRegisterStatus(const OpRegistrationData &reg_data);

  SoHandlesVec handles_vec_;
  static std::map<string, string> options_;
--- a/src/ge/common/helper/model_helper.cc
+++ b/src/ge/common/helper/model_helper.cc
@@ -184,7 +184,8 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin
  // Model
  ModelPtr model_ptr = ge::MakeShared<ge::Model>();
  GE_CHECK_NOTNULL_EXEC(model_ptr, return MEMALLOC_FAILED);
  model_ptr->SetName(compute_graph->GetName());
  std::string original_model_name = compute_graph->GetName() + "_original";
  model_ptr->SetName(original_model_name);
  model_ptr->SetGraph(graph);
  model_ptr->SetVersion(static_cast<uint32_t>(OM_PROTO_VERSION));
  string framework_version;
@@ -504,4 +505,36 @@ Status ModelHelper::ReleaseLocalModelData() noexcept {
  }
  return result;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name,
                                                                                             string &base_name) {
  GELOGD("Get base_name from file, file_name:%s", file_name.c_str());
  GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output");
  size_t start_position = 0;
  // using output as base_name (ignore ".om")
  size_t filename_suffixes = 3;
  if (file_name.find_last_of('/') != string::npos) {
    start_position = file_name.find_last_of('/') + 1;
  }
  size_t end_position = file_name.length() - filename_suffixes;
  base_name = file_name.substr(start_position, end_position - start_position);
  GE_CHK_BOOL_EXEC_WARN(!base_name.empty(), return FAILED, "Get base_name failed, check params --output");
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
 ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) {
  GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str());
  // this can only be used after merged graph(graph name will be append with "_x", x is index);
  GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output");
  size_t start_position = 0;
  size_t end_position = graph_name.length();
  // using graph as model_name (ignore "_x", x is the index of graph)
  if (graph_name.find_last_of('_') != string::npos) {
    end_position = graph_name.find_last_of('_');
  }
  model_name = graph_name.substr(start_position, end_position);
  GE_CHK_BOOL_EXEC_WARN(!model_name.empty(), return FAILED, "Get model_name failed, check params --output");
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/common/model_parser/base.cc
+++ b/src/ge/common/model_parser/base.cc
@@ -15,7 +15,7 @@
 */

 #include "common/model_parser/base.h"

 #include "common/helper/model_helper.h"
 #include <securec.h>
 #include <sys/sysinfo.h>
 #include <fstream>
@@ -61,7 +61,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro

  // read data as a block:
  (void)fs.read(data, len);

  ModelHelper model_helper;
  model_helper.GetBaseNameFromFileName(model_path, model_data.om_name);
  // Set the model data parameter
  model_data.model_data = data;
  model_data.model_len = len;
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -292,6 +292,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
      GELOGW("ProfMgrStartUp failed.");
      return FAILED;
    }
    GELOGD("StartProfiling, prof_handle: %p", prof_handle);
    prof_handle_vec_.push_back(prof_handle);
  }
 #endif
@@ -314,8 +315,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
  for (size_t i = 0; i < prof_handle_vec_.size(); ++i) {
    int result = ProfMgrStop(prof_handle_vec_[i]);
    if (result != 0) {
      GELOGW("ProfMgr stop return fail:%d.", result);
      return;
      GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]);
    }
  }
  vector<void *>().swap(prof_handle_vec_);
--- a/src/ge/common/properties_manager.cc
+++ b/src/ge/common/properties_manager.cc
@@ -208,6 +208,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> Propertie
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model,
                                                                                         const std::string &om_name,
                                                                                         const std::string &op_name) {
  std::lock_guard<std::mutex> lock(dump_mutex_);
  // if dump all
@@ -216,9 +217,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayer
  }

  // if this model need dump
  auto model_iter = model_dump_properties_map_.find(model);
  if (model_iter != model_dump_properties_map_.end()) {
  auto om_name_iter = model_dump_properties_map_.find(om_name);
  auto model_name_iter = model_dump_properties_map_.find(model);
  if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) {
    // if no dump layer info, dump all layer in this model
    auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter;
    if (model_iter->second.empty()) {
      return true;
    }
--- a/src/ge/common/properties_manager.h
+++ b/src/ge/common/properties_manager.h
@@ -84,7 +84,7 @@ class PropertiesManager {
  void AddDumpPropertyValue(const std::string &model, const std::set<std::string> &layers);
  std::set<std::string> GetAllDumpModel();
  std::set<std::string> GetDumpPropertyValue(const std::string &model);
  bool IsLayerNeedDump(const std::string &model, const std::string &op_name);
  bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name);
  void DeleteDumpPropertyValue(const std::string &model);
  void ClearDumpPropertyValue();
  bool QueryModelDumpStatus(const std::string &model);
--- a/src/ge/executor/ge_executor.cc
+++ b/src/ge/executor/ge_executor.cc
@@ -641,7 +641,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da
      model_data.model_data = nullptr;
    }
  }

  return ret;
 }

--- a/src/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/src/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -131,6 +131,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_ke
    GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret);
    return FAILED;
  }
  op.BreakConnect();

  return SUCCESS;
 }
--- a/src/ge/ge_runner.mk
+++ b/src/ge/ge_runner.mk
@@ -407,7 +407,6 @@ LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD
 LOCAL_CFLAGS += -g -O0

 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
 LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

--- a/src/ge/ge_runtime/model_runner.cc
+++ b/src/ge/ge_runtime/model_runner.cc
@@ -49,15 +49,6 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
  return true;
 }

 bool ModelRunner::LoadModelComplete(uint32_t model_id) {
  auto model_iter = runtime_models_.find(model_id);
  if (model_iter == runtime_models_.end()) {
    GELOGE(PARAM_INVALID, "Model id %u not found.", model_id);
    return false;
  }
  return model_iter->second->LoadComplete();
 }

 const std::vector<uint32_t> &ModelRunner::GetTaskIdList(uint32_t model_id) const {
  auto model_iter = runtime_models_.find(model_id);
  if (model_iter == runtime_models_.end()) {
@@ -69,28 +60,6 @@ const std::vector<uint32_t> &ModelRunner::GetTaskIdList(uint32_t model_id) const
  return model_iter->second->GetTaskIdList();
 }

 const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) const {
  auto model_iter = runtime_models_.find(model_id);
  if (model_iter == runtime_models_.end()) {
    GELOGE(PARAM_INVALID, "Model id %u not found.", model_id);
    static const std::vector<uint32_t> empty_ret;
    return empty_ret;
  }

  return model_iter->second->GetStreamIdList();
 }

 const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
  auto model_iter = runtime_models_.find(model_id);
  if (model_iter == runtime_models_.end()) {
    GELOGW("Model id %u not found.", model_id);
    static const std::map<std::string, std::shared_ptr<RuntimeInfo>> empty_ret;
    return empty_ret;
  }

  return model_iter->second->GetRuntimeInfoMap();
 }

 bool ModelRunner::UnloadModel(uint32_t model_id) {
  auto iter = runtime_models_.find(model_id);
  if (iter != runtime_models_.end()) {
--- a/src/ge/ge_runtime/output.cc
+++ b/src/ge/ge_runtime/output.cc
@@ -76,7 +76,7 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde
    DataBuffer data_buf = rslt->blobs[data_begin + data_count];
    bool ret = SetDataBuf(data_buf, data_begin, data_count, i, support_mem_share);
    if (!ret) {
      GELOGE(FAILED, "Copy data to host error. index: %lu, addr: %p", i, v_input_data_addr_[i]);
      GELOGE(FAILED, "Copy data to host failed. index: %lu, addr: %p", i, v_input_data_addr_[i]);
      return ret;
    }
    data_index = data_begin + data_count;
--- a/src/ge/ge_runtime/runtime_model.cc
+++ b/src/ge/ge_runtime/runtime_model.cc
@@ -28,6 +28,7 @@

 namespace ge {
 namespace model_runner {

 RuntimeModel::~RuntimeModel() {
  GELOGI("RuntimeModel destructor start");

@@ -115,34 +116,23 @@ bool RuntimeModel::InitEvent(uint32_t event_num) {
  return true;
 }

 bool RuntimeModel::InitLabel(std::shared_ptr<DavinciModel> &davinci_model) {
  GELOGI("batch number:%u.", davinci_model->GetBatchNum());
  label_list_.resize(davinci_model->GetBatchNum());
  for (auto &task_info : davinci_model->GetTaskInfoList()) {
    if (task_info == nullptr) {
      GELOGE(PARAM_INVALID, "task_info is null.");
      continue;
    }

    if (task_info->type() != TaskInfoType::LABEL_SET) {
      continue;
    }
    auto label_set_task_info = std::static_pointer_cast<LabelSetTaskInfo>(task_info);

    if (label_set_task_info->stream_id() >= stream_list_.size()) {
      GELOGE(PARAM_INVALID, "Invalid stream id.");
 bool RuntimeModel::InitLabel(uint32_t batch_num) {
  GELOGI("batch number:%u.", batch_num);
  for (uint32_t i = 0; (batch_num != 0 && i <= batch_num); ++i) {
    rtLabel_t rt_lLabel = nullptr;
    rtError_t rt_ret = rtLabelCreate(&rt_lLabel);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, i; %u; ret: 0x%X", i, rt_ret);
      return false;
    }

    rtLabel_t rt_label = nullptr;
    rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, ret: 0x%X", rt_ret);
    if (rt_lLabel == nullptr) {
      GELOGE(RT_FAILED, "rtLabel is nullptr!");
      return false;
    }
    label_list_[label_set_task_info->label_id()] = rt_label;
  }

    label_list_.emplace_back(rt_lLabel);
  }
  return true;
 }

@@ -174,7 +164,7 @@ bool RuntimeModel::InitResource(std::shared_ptr<DavinciModel> &davinci_model) {
    return false;
  }

  if (!InitLabel(davinci_model)) {
  if (!InitLabel(davinci_model->GetBatchNum())) {
    return false;
  }

@@ -219,41 +209,20 @@ bool RuntimeModel::LoadTask() {
      return false;
    }
    task_id_list_.push_back(task_id);
    stream_id_list_.push_back(stream_id);
    if (task->Args() != nullptr) {
      std::shared_ptr<RuntimeInfo> runtime_tuple = nullptr;
      GE_MAKE_SHARED(runtime_tuple = std::make_shared<RuntimeInfo>(task_id, stream_id, task->Args()), return false);
      auto emplace_ret = runtime_info_map_.emplace(task->task_name(), runtime_tuple);
      if (!emplace_ret.second) {
        GELOGW("Task name exist:%s", task->task_name().c_str());
      }
    }
  }
  if (task_list_.empty()) {
    GELOGE(FAILED, "Task list is empty");
    return false;
  }
  GELOGI("Distribute task succ.");

  GELOGI("LoadTask succ.");
  return true;
 }

 bool RuntimeModel::LoadComplete() {
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  auto rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtModelGetTaskId failed, ret:0x%X", rt_ret);
    return RT_FAILED;
  }
  task_id_list_.push_back(task_id);
  stream_id_list_.push_back(stream_id);

  rt_ret = rtModelLoadComplete(rt_model_handle_);
  auto rt_ret = rtModelLoadComplete(rt_model_handle_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api rtModelLoadComplete failed, ret: 0x%X.", rt_ret);
    return false;
  }

  GELOGI("LoadTask succ.");
  return true;
 }

@@ -301,14 +270,10 @@ bool RuntimeModel::Run() {
    return false;
  }

  GELOGI("Run rtModelExecute success, ret = 0x%X", ret);
  GELOGI("Run rtModelExecute success");

  ret = rtStreamSynchronize(rt_model_stream_);
  if (ret != RT_ERROR_NONE) {
    if (ret == RT_ERROR_END_OF_SEQUENCE) {
      GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret);
      return true;
    }
    GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret);
    return false;
  }
@@ -468,7 +433,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr<DavinciModel> &davinci_model
    }

    if (constant->output_tensors[0].size < constant->weight_data.size()) {
      GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", constant->output_tensors[0].size,
      GELOGE(PARAM_INVALID, "Output size:%u is less than weight data size:%zu", constant->output_tensors[0].size,
             constant->weight_data.size());
      return false;
    }
@@ -483,8 +448,11 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr<DavinciModel> &davinci_model
      /// The logic of GetShapeSize is wrong, the scaler tensor's GetShapeSize is zero
      /// and that of unknown shape is zero too.
      /// Unknown shape will not appear here, so we can use zero judge a tensor is scaler or not.
      int64_t elem_num =
        (constant->weight_tensors[0].GetShapeSize() == 0) ? 1 : constant->weight_tensors[0].GetShapeSize();
      int64_t elem_num = constant->weight_tensors[0].GetShapeSize();
      if (elem_num == 0 && constant->weight_tensors[0].size == 0) {
        elem_num = 1;
      }

      if (constant->weight_data.size() < sizeof(uint64_t)) {
        GELOGE(FAILED, "weight_data size is smaller than sizeof(uint64_t)");
        return false;
@@ -527,6 +495,5 @@ void RuntimeModel::CreateOutput(uint32_t index, const OpInfo &op_info, InputOutp

 const std::vector<uint32_t> &RuntimeModel::GetTaskIdList() const { return task_id_list_; }

 const std::vector<uint32_t> &RuntimeModel::GetStreamIdList() const { return stream_id_list_; }
 }  // namespace model_runner
 }  // namespace ge
--- a/src/ge/ge_runtime/runtime_model.h
+++ b/src/ge/ge_runtime/runtime_model.h
@@ -27,7 +27,7 @@

 namespace ge {
 namespace model_runner {
 using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;

 class Task;
 class RuntimeModel {
 public:
@@ -35,10 +35,7 @@ class RuntimeModel {
  ~RuntimeModel();

  bool Load(uint32_t device_id, uint64_t session_id, std::shared_ptr<DavinciModel> &davinci_model);
  bool LoadComplete();
  const std::vector<uint32_t> &GetTaskIdList() const;
  const std::vector<uint32_t> &GetStreamIdList() const;
  const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap() const { return runtime_info_map_; }
  bool Run();
  bool CopyInputData(const InputData &input_data);
  bool GetInputOutputDescInfo(bool zero_copy, std::vector<InputOutputDescInfo> *input_desc,
@@ -51,7 +48,7 @@ class RuntimeModel {
  bool LoadTask();
  bool InitStream(std::shared_ptr<DavinciModel> &davinci_model);
  bool InitEvent(uint32_t event_num);
  bool InitLabel(std::shared_ptr<DavinciModel> &davinci_model);
  bool InitLabel(uint32_t batch_num);
  bool InitDataInfo(std::shared_ptr<DavinciModel> &davinci_model);
  bool InitOutputInfo(std::shared_ptr<DavinciModel> &davinci_model);
  bool InitConstantInfo(std::shared_ptr<DavinciModel> &davinci_model);
@@ -80,8 +77,6 @@ class RuntimeModel {
  std::vector<std::shared_ptr<OpInfo>> constant_info_list_{};

  std::vector<uint32_t> task_id_list_{};
  std::vector<uint32_t> stream_id_list_{};
  std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
 };

 }  // namespace model_runner
--- a/src/ge/ge_runtime/task/aicpu_task.cc
+++ b/src/ge/ge_runtime/task/aicpu_task.cc
@@ -85,15 +85,11 @@ bool AicpuTask::Distribute() {
    return false;
  }

  input_output_addr_ = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset);

  auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT;
  GELOGI(
    "Distribute AicpuTask start, args_size = %u, io_addrs_num = %u, so_name = %s, kernel_name = %s, dump_flag = %d.",
    args_size, io_addrs_num, task_info_->so_name().data(), task_info_->kernel_name().data(), dump_flag);
  rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(task_info_->so_name().data()),
                                     reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, args_,
                                     args_size, nullptr, stream_, dump_flag);
  GELOGI("Distribute AicpuTask start, args_size = %u, io_addrs_num = %u, so_name = %s, kernel_name = %s.", args_size,
         io_addrs_num, task_info_->so_name().data(), task_info_->kernel_name().data());
  rt_ret = rtCpuKernelLaunch(reinterpret_cast<const void *>(task_info_->so_name().data()),
                             reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, args_, args_size,
                             nullptr, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
--- a/src/ge/ge_runtime/task/aicpu_task.h
+++ b/src/ge/ge_runtime/task/aicpu_task.h
@@ -18,7 +18,6 @@
 #define GE_GE_RUNTIME_TASK_AICPU_TASK_H_

 #include <memory>
 #include <string>
 #include "ge_runtime/task/task.h"

 namespace ge {
@@ -31,17 +30,12 @@ class AicpuTask : public TaskRepeater<AicpuTaskInfo> {

  bool Distribute() override;

  void *Args() override { return input_output_addr_; }

  std::string task_name() const override { return task_info_->op_name(); }

 private:
  static void ReleaseRtMem(void **ptr) noexcept;

  std::shared_ptr<AicpuTaskInfo> task_info_;
  void *stream_;
  void *args_;
  void *input_output_addr_;
 };
 }  // namespace model_runner
 }  // namespace ge
--- a/src/ge/ge_runtime/task/hccl_task.cc
+++ b/src/ge/ge_runtime/task/hccl_task.cc
@@ -115,6 +115,7 @@ bool HcclTask::Distribute() {
    rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      (void)rtStreamDestroy(stream);
      return false;
    }

--- a/src/ge/ge_runtime/task/label_goto_task.cc
+++ b/src/ge/ge_runtime/task/label_goto_task.cc
@@ -1,70 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ge_runtime/task/label_goto_task.h"
 #include "ge_runtime/task/task_factory.h"

 namespace ge {
 namespace model_runner {
 LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info)
    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info),
      task_info_(task_info),
      stream_(nullptr),
      label_(nullptr) {
  if (task_info_ == nullptr) {
    GELOGW("task_info_ is null!");
    return;
  }
  auto stream_list = model_context.stream_list();
  auto label_list = model_context.label_list();
  uint32_t stream_id = task_info->stream_id();
  uint32_t label_id = task_info->label_id();
  GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id);
  if (stream_id >= stream_list.size() || label_id >= label_list.size()) {
    GELOGW("Stream/Label id invalid.");
    return;
  }
  stream_ = stream_list[stream_id];
  label_ = label_list[label_id];
 }

 LabelGotoTask::~LabelGotoTask() {}

 bool LabelGotoTask::Distribute() {
  GELOGI("LabelGotoTask Distribute start.");
  if (stream_ == nullptr) {
    GELOGE(PARAM_INVALID, "stream is null!");
    return false;
  }
  if (label_ == nullptr) {
    GELOGE(PARAM_INVALID, "label is null!");
    return false;
  }
  rtError_t rt_ret = rtLabelGotoEx(label_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  GELOGI("DistributeTask end.");
  return true;
 }

 REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo);

 }  // namespace model_runner
 }  // namespace ge
--- a/src/ge/ge_runtime/task/label_goto_task.h
+++ b/src/ge/ge_runtime/task/label_goto_task.h
@@ -1,41 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_
 #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_

 #include <memory>
 #include "ge_runtime/task/task.h"

 namespace ge {
 namespace model_runner {
 class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> {
 public:
  LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info);

  ~LabelGotoTask() override;

  bool Distribute() override;

 private:
  std::shared_ptr<LabelGotoTaskInfo> task_info_;
  void *stream_;
  void *label_;
 };
 }  // namespace model_runner
 }  // namespace ge

 #endif  // GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_
--- a/src/ge/ge_runtime/task/label_set_task.cc
+++ b/src/ge/ge_runtime/task/label_set_task.cc
@@ -1,70 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ge_runtime/task/label_set_task.h"
 #include "ge_runtime/task/task_factory.h"

 namespace ge {
 namespace model_runner {
 LabelSetTask::LabelSetTask(const ModelContext &model_context, const std::shared_ptr<LabelSetTaskInfo> &task_info)
    : TaskRepeater<LabelSetTaskInfo>(model_context, task_info),
      task_info_(task_info),
      stream_(nullptr),
      label_(nullptr) {
  if (task_info_ == nullptr) {
    GELOGW("task_info_ is null!");
    return;
  }
  auto stream_list = model_context.stream_list();
  auto label_list = model_context.label_list();
  uint32_t stream_id = task_info->stream_id();
  uint32_t label_id = task_info->label_id();
  GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id);
  if (stream_id >= stream_list.size() || label_id >= label_list.size()) {
    GELOGW("Stream/Label id invalid.");
    return;
  }
  stream_ = stream_list[stream_id];
  label_ = label_list[label_id];
 }

 LabelSetTask::~LabelSetTask() {}

 bool LabelSetTask::Distribute() {
  GELOGI("LabelSetTask Distribute start.");
  if (stream_ == nullptr) {
    GELOGE(PARAM_INVALID, "stream is null!");
    return false;
  }
  if (label_ == nullptr) {
    GELOGE(PARAM_INVALID, "label is null!");
    return false;
  }
  rtError_t rt_ret = rtLabelSet(label_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  GELOGI("DistributeTask end.");
  return true;
 }

 REGISTER_TASK(TaskInfoType::LABEL_SET, LabelSetTask, LabelSetTaskInfo);

 }  // namespace model_runner
 }  // namespace ge
--- a/src/ge/ge_runtime/task/label_set_task.h
+++ b/src/ge/ge_runtime/task/label_set_task.h
@@ -1,41 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_
 #define GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_

 #include <memory>
 #include "ge_runtime/task/task.h"

 namespace ge {
 namespace model_runner {
 class LabelSetTask : public TaskRepeater<LabelSetTaskInfo> {
 public:
  LabelSetTask(const ModelContext &model_context, const std::shared_ptr<LabelSetTaskInfo> &task_info);

  ~LabelSetTask() override;

  bool Distribute() override;

 private:
  std::shared_ptr<LabelSetTaskInfo> task_info_;
  void *stream_;
  void *label_;
 };
 }  // namespace model_runner
 }  // namespace ge

 #endif  // GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_
--- a/src/ge/ge_runtime/task/label_switch_task.cc
+++ b/src/ge/ge_runtime/task/label_switch_task.cc
@@ -1,131 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ge_runtime/task/label_switch_task.h"
 #include "ge_runtime/task/task_factory.h"

 namespace ge {
 namespace model_runner {
 LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
                                 const std::shared_ptr<LabelSwitchTaskInfo> &task_info)
    : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info),
      task_info_(task_info),
      stream_(nullptr),
      all_label_resource_(),
      label_info_(nullptr) {
  if (task_info_ == nullptr) {
    GELOGW("task_info_ is null!");
    return;
  }

  all_label_resource_ = model_context.label_list();
  auto stream_list = model_context.stream_list();
  uint32_t stream_id = task_info->stream_id();
  GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
  if (stream_id >= stream_list.size()) {
    GELOGW("Stream id invalid.");
    return;
  }
  stream_ = stream_list[stream_id];
 }

 LabelSwitchTask::~LabelSwitchTask() {
  if (label_info_ != nullptr) {
    rtError_t rt_ret = rtFree(label_info_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret);
    }
    label_info_ = nullptr;
  }
 }

 bool LabelSwitchTask::Distribute() {
  GELOGI("LabelSwitchTask Distribute start.");
  if (!CheckParamValid()) {
    return false;
  }

  const std::vector<uint32_t> &label_index_list = task_info_->label_list();
  std::vector<void *> label_list(task_info_->label_size(), nullptr);

  for (size_t i = 0; i < task_info_->label_size(); ++i) {
    uint32_t label_index = label_index_list[i];
    if (label_index >= all_label_resource_.size()) {
      GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index,
             all_label_resource_.size());
      return false;
    }
    label_list[i] = all_label_resource_[label_index];
    GELOGI("Case %zu: label id %zu.", i, label_index);
  }

  uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size();
  rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  GELOGI("DistributeTask end.");
  return true;
 }

 bool LabelSwitchTask::CheckParamValid() {
  if (stream_ == nullptr) {
    GELOGE(PARAM_INVALID, "stream is null!");
    return false;
  }

  if (task_info_->label_list().empty()) {
    GELOGE(PARAM_INVALID, "label_list is empty.");
    return false;
  }

  if (task_info_->label_size() != task_info_->label_list().size()) {
    GELOGE(PARAM_INVALID, "label_list size %zu but label_size is %u.", task_info_->label_list().size(),
           task_info_->label_size());
    return false;
  }

  if (task_info_->label_size() >= UINT32_MAX / sizeof(rtLabelDevInfo)) {
    GELOGE(PARAM_INVALID, "label_size %u will overflow.", task_info_->label_size());
    return false;
  }

  if (label_info_ != nullptr) {
    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
    return false;
  }

  return true;
 }

 REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo);

 }  // namespace model_runner
 }  // namespace ge
--- a/src/ge/ge_runtime/task/label_switch_task.h
+++ b/src/ge/ge_runtime/task/label_switch_task.h
@@ -1,44 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_
 #define GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_

 #include <memory>
 #include "ge_runtime/task/task.h"

 namespace ge {
 namespace model_runner {
 class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> {
 public:
  LabelSwitchTask(const ModelContext &model_context, const std::shared_ptr<LabelSwitchTaskInfo> &task_info);

  ~LabelSwitchTask() override;

  bool Distribute() override;

 private:
  bool CheckParamValid();

  std::shared_ptr<LabelSwitchTaskInfo> task_info_;
  void *stream_;
  std::vector<void *> all_label_resource_;
  void *label_info_;
 };
 }  // namespace model_runner
 }  // namespace ge

 #endif  // GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_
--- a/src/ge/ge_runtime/task/stream_switch_task.cc
+++ b/src/ge/ge_runtime/task/stream_switch_task.cc
@@ -51,7 +51,7 @@ bool StreamSwitchTask::Distribute() {
  }

  if (static_cast<uint64_t>(task_info_->true_stream_id()) >= stream_list_.size()) {
    GELOGE(PARAM_INVALID, "true_stream_id %ld must less than stream_list_ size %zu!", task_info_->true_stream_id(),
    GELOGE(PARAM_INVALID, "true_stream_id %ld must be less than stream_list_ size %zu!", task_info_->true_stream_id(),
           stream_list_.size());
    return false;
  }
--- a/src/ge/ge_runtime/task/task.h
+++ b/src/ge/ge_runtime/task/task.h
@@ -18,9 +18,7 @@
 #define GE_GE_RUNTIME_TASK_TASK_H_

 #include <memory>
 #include <utility>
 #include <vector>
 #include <string>
 #include "runtime/rt_model.h"
 #include "ge_runtime/model_context.h"
 #include "ge_runtime/task_info.h"
@@ -34,10 +32,6 @@ class Task {
  virtual ~Task() {}

  virtual bool Distribute() = 0;

  virtual void *Args() { return nullptr; }

  virtual std::string task_name() const { return ""; }
 };

 template <class T>
--- a/src/ge/ge_runtime/task/tbe_task.cc
+++ b/src/ge/ge_runtime/task/tbe_task.cc
@@ -95,14 +95,15 @@ bool TbeTask::Distribute() {
    return false;
  }

  GELOGI("InitTbeTask end.");
  GELOGI("DistributeTbeTask start.");
  auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT;
  rt_ret = rtKernelLaunchWithFlag(stub_func_, task_info_->block_dim(), args_, args_size, nullptr, stream_, dump_flag);
  rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, args_size, nullptr, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api rtKernelLaunch failed, ret: 0x%X", rt_ret);
    return false;
  }
  GELOGI("[DataDump] task name:%s, dump_flag:%d", task_info_->op_name().c_str(), dump_flag);

  GELOGI("DistributeTbeTask end.");
  return true;
 }

--- a/src/ge/ge_runtime/task/tbe_task.h
+++ b/src/ge/ge_runtime/task/tbe_task.h
@@ -30,10 +30,6 @@ class TbeTask : public TaskRepeater<TbeTaskInfo> {

  bool Distribute() override;

  void *Args() override { return args_; }

  std::string task_name() const override { return task_info_->op_name(); }

 private:
  std::shared_ptr<TbeTaskInfo> task_info_;
  void *stream_;
--- a/src/ge/generator/ge_generator.cc
+++ b/src/ge/generator/ge_generator.cc
@@ -20,6 +20,7 @@
 #include "common/helper/model_helper.h"
 #include "common/helper/om_file_helper.h"
 #include "common/util.h"
 #include "common/util/error_manager/error_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "ge/ge_api.h"
 #include "graph/ge_context.h"
@@ -125,17 +126,7 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen
  if (data_op == nullptr) {
    return FAILED;
  }
  auto op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
  auto input_desc = op_desc->MutableInputDesc(index);
  GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID);
  ge::Format old_format = input_desc->GetFormat();
  if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
    input_desc->SetFormat(FORMAT_ND);
    input_desc->SetOriginFormat(FORMAT_ND);
    (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format));
    (void)AttrUtils::SetBool(data_op, "_is_single_op", true);
  }
  (void)AttrUtils::SetBool(data_op, "_is_single_op", true);

  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail.");
@@ -157,17 +148,7 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons
  if (op_desc == nullptr) {
    return FAILED;
  }
  auto single_op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID);
  auto output_desc = single_op_desc->MutableOutputDesc(0);
  GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID);
  ge::Format old_format = output_desc->GetFormat();
  if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
    output_desc->SetFormat(FORMAT_ND);
    output_desc->SetOriginFormat(FORMAT_ND);
    (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format));
    (void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
  }
  (void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
  int32_t count = 0;
  for (const auto &out_desc : outputs) {
    GeTensorDesc tensor = out_desc.GetTensorDesc();
@@ -212,19 +193,6 @@ static void GetOpsProtoPath(string &opsproto_path) {
  opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
 }

 static string GetModelNameFromFileName(const string &file_name_prefix) {
  int start_position = 0;
  // using output as model_name (ignore ".om")
  int filename_suffixes = 3;
  if (file_name_prefix.find_last_of('/') != string::npos) {
    start_position += 1;
  }
  int end_position = file_name_prefix.length() - filename_suffixes;
  string model_name = file_name_prefix.substr(start_position, end_position - start_position);
  GELOGI("Get model_name from file, model_name:%s", model_name.c_str());
  return model_name;
 }

 class GeGenerator::Impl {
 public:
  Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models);
@@ -332,8 +300,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
  GraphId graph_id;
  GeRootModelPtr ge_root_model = nullptr;
  GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
  const string model_name = GetModelNameFromFileName(file_name_prefix);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!");
  impl_->is_offline_ = is_offline;
  Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model);
  if (ret != SUCCESS) {
@@ -345,9 +311,15 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
  }
  GE_CHECK_NOTNULL(ge_root_model);
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  ModelHelper model_helper;
  string model_name = "";
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name);
  if (name_ret != SUCCESS) {
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid");
    return PARAM_INVALID;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];

  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null");
  ge_model->SetName(model_name);
  ret = impl_->SaveModel(file_name_prefix, ge_model, model);
--- a/src/ge/graph/build/memory/block_mem_assigner.cc
+++ b/src/ge/graph/build/memory/block_mem_assigner.cc
@@ -38,6 +38,7 @@
 namespace {
 const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag";
 const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op";
 const char *const kOpNoReuseMem = "no_reuse_mem_flag";
 const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory";
 const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM";
 const int kReuseMaxCount = 10;
@@ -624,8 +625,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env);
  if (ge_disable_reuse_mem_env != "1") {
    bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]);
    is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && reuse_mem_flag && is_op_reuse_mem &&
                      (IsPreReuse(n, out_index));
    is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) &&
                      reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index));
    auto stream_id = node_op_desc->GetStreamId();
    auto map_iter = reusable_streams_map_.find(stream_id);
    if (is_reuse_memory && map_iter != reusable_streams_map_.end()) {
@@ -1182,6 +1183,9 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks,

    GELOGI("Block continuous input index:%d", memory_block->input_index_);
    count++;
    if (count == 1) {
      memory_block->first_continuous_block_ = true;
    }
    if (count == continuous_blocks.size()) {
      memory_block->last_continuous_block_ = true;
    }
@@ -1242,6 +1246,10 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
    if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) {
      continue;
    }
    if (memory_block->first_continuous_block_) {
      mem_offset_ += MEM_ALIGN_SIZE;
    }

    memory_block->Resize();
    memory_block->SetHeadOffset(mem_offset_);
    mem_offset_ += memory_block->Size();
--- a/src/ge/graph/build/memory/block_mem_assigner.h
+++ b/src/ge/graph/build/memory/block_mem_assigner.h
@@ -64,6 +64,7 @@ class MemoryBlock {
        reuse_mem_(reuse_mem),
        input_index_(0),
        continuous_block_(false),
        first_continuous_block_(false),
        last_continuous_block_(false),
        is_zero_copy_(false),
        block_size_(block_size),
@@ -129,6 +130,7 @@ class MemoryBlock {
  bool reuse_mem_;
  uint32_t input_index_;
  bool continuous_block_;
  bool first_continuous_block_;
  bool last_continuous_block_;
  bool is_zero_copy_;
  std::map<int64_t, size_t> depend_stream_life_;
--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -446,6 +446,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node
    return ge::FAILED;
  }

  memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
  for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
    output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_;
    size_t pre_mem_offset = memory_offset_[0].mem_offset_;
--- a/src/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/src/ge/graph/load/new_model_manager/data_dumper.cc
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>

 #include "common/debug/log.h"
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
@@ -28,6 +29,7 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "proto/ge_ir.pb.h"
 #include "proto/op_mapping_info.pb.h"
 #include "runtime/mem.h"
@@ -106,6 +108,7 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_
 }

 void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
  GELOGI("Start to save data %s message", node->GetName().c_str());
  if (node != nullptr) {
    auto input_op_desc = node->GetOpDesc();
    if (input_op_desc == nullptr) {
@@ -126,6 +129,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
          {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
      }
    }
    GELOGI("Save data message successfully");
  }
 }

@@ -159,30 +163,39 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s
      return;
    }

    GELOGI("Save input dump task %s, id: %u.", data_op->GetName().c_str(), task_id);
    int64_t data_size = 0;
    if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
      GELOGI("Get aipp data size according to attr is %ld", data_size);
    } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get input size filed");
      return;
    }

    GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id,
           stream_id, data_size);
    op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index,
                        inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims()});
                        inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size});
  }
 }

 static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
                                 aicpu::dump::OpMappingInfo &op_mapping_info) {
  if (step_id != 0) {
    GELOGI("step_id exist.");
    GELOGI("step_id exists.");
    op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
  } else {
    GELOGI("step_id is null.");
  }

  if (loop_per_iter != 0) {
    GELOGI("loop_per_iter exist.");
    GELOGI("loop_per_iter exists.");
    op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
  } else {
    GELOGI("loop_per_iter is null.");
  }

  if (loop_cond != 0) {
    GELOGI("loop_cond exist.");
    GELOGI("loop_cond exists.");
    op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
  } else {
    GELOGI("loop_cond is null.");
@@ -211,10 +224,19 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
        output.mutable_shape()->add_dim(dim);
      }

      int64_t output_size = 0;
      if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
        GELOGE(PARAM_INVALID, "Get output size filed");
        return PARAM_INVALID;
      }
      GELOGI("Get output size in dump is %ld", output_size);
      std::string origin_name;
      int32_t origin_output_index = -1;
      (void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
      (void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
      GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size);
                      return PARAM_INVALID)
      output.set_size(output_size);
      output.set_original_name(origin_name);
      output.set_original_output_index(origin_output_index);
      output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat()));
@@ -247,6 +269,10 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
  int32_t origin_output_index = -1;
  (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0,
                  GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size);
                  return PARAM_INVALID)
  output.set_size(inner_dump_info.data_size);
  output.set_original_name(origin_name);
  output.set_original_output_index(origin_output_index);
  output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat()));
@@ -283,6 +309,17 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::
      input.mutable_shape()->add_dim(dim);
    }

    int64_t input_size = 0;
    if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
      GELOGI("Get aipp input size according to attr is %ld", input_size);
    } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get input size filed");
      return PARAM_INVALID;
    }
    GELOGI("Get input size in dump is %ld", input_size);
    GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size);
                    return PARAM_INVALID;)
    input.set_size(input_size);
    input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i));
    task.mutable_input()->Add(std::move(input));
  }
@@ -323,7 +360,7 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in
  }

  load_flag_ = true;
  GELOGI("LoadDumpInfo success, proto size: %zu.", proto_size);
  GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size);
  return SUCCESS;
 }

@@ -360,11 +397,12 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_
    return RT_FAILED;
  }
  load_flag_ = false;
  GELOGI("UnloadDumpInfo success, proto size: %zu.", proto_size);
  GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
  return SUCCESS;
 }
 Status DataDumper::LoadDumpInfo() {
  PrintCheckLog();
  std::string dump_list_key;
  PrintCheckLog(dump_list_key);

  if (op_list_.empty()) {
    return SUCCESS;
@@ -374,12 +412,13 @@ Status DataDumper::LoadDumpInfo() {

  auto dump_path = PropertiesManager::Instance().GetDumpOutputPath();
  op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/");
  op_mapping_info.set_model_name(model_name_);
  op_mapping_info.set_model_name(dump_list_key);
  op_mapping_info.set_model_id(model_id_);
  op_mapping_info.set_flag(kAicpuLoadFlag);
  op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep());
  SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  GELOGD("Dump step in load dump info is %s", PropertiesManager::Instance().GetDumpStep().c_str());
  GELOGI("Dump step is %s and dump path  is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(),
         dump_path.c_str());

  for (const auto &op_iter : op_list_) {
    aicpu::dump::Task task;
@@ -441,7 +480,7 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
  if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput ||
      PropertiesManager::Instance().GetDumpMode() == kDumpInput ||
      PropertiesManager::Instance().GetDumpMode() == kDumpAll) {
    GELOGI("add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
    GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
    aicpu::dump::Task task;
    task.set_end_graph(true);
    task.set_task_id(end_graph_task_id_);
@@ -477,7 +516,7 @@ Status DataDumper::UnloadDumpInfo() {
  return SUCCESS;
 }

 void DataDumper::PrintCheckLog() {
 void DataDumper::PrintCheckLog(string &dump_list_key) {
  std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel();
  if (model_list.empty()) {
    GELOGI("No model need dump.");
@@ -485,19 +524,21 @@ void DataDumper::PrintCheckLog() {
  }

  GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str());
  if (model_list.find(ge::DUMP_ALL_MODEL) == model_list.end()) {
    if (model_list.find(model_name_) == model_list.end()) {
  bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
  bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
  if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
    if (not_find_by_omname && not_find_by_modelname) {
      std::string model_list_str;
      for (auto &model : model_list) {
        model_list_str += "[" + model + "].";
      }

      GELOGW("Model %s not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str());
      GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str());
      return;
    }
  }

  std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(model_name_);
  dump_list_key = not_find_by_omname ? model_name_ : om_name_;
  std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key);
  std::set<std::string> dump_op_list;
  for (auto &inner_dump_info : op_list_) {
    // oplist value OpDescPtr is not nullptr
@@ -506,7 +547,7 @@ void DataDumper::PrintCheckLog() {

  for (auto &dump_op : config_dump_op_list) {
    if (dump_op_list.find(dump_op) == dump_op_list.end()) {
      GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), model_name_.c_str());
      GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str());
    }
  }
 }
--- a/src/ge/graph/load/new_model_manager/data_dumper.h
+++ b/src/ge/graph/load/new_model_manager/data_dumper.h
@@ -64,6 +64,8 @@ class DataDumper {
  void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args);
  void SaveEndGraphId(uint32_t task_id, uint32_t stream_id);

  void SetOmName(const std::string &om_name) { om_name_ = om_name; }

  Status LoadDumpInfo();

  Status UnloadDumpInfo();
@@ -71,9 +73,13 @@ class DataDumper {
 private:
  void ReleaseDevMem(void **ptr) noexcept;

  void PrintCheckLog();
  void PrintCheckLog(string &dump_list_key);

  std::string model_name_;

  // for inference data dump
  std::string om_name_;

  uint32_t model_id_;
  RuntimeParam runtime_param_;
  void *dev_mem_load_;
@@ -107,6 +113,7 @@ struct DataDumper::InnerDumpInfo {
  int input_anchor_index;
  int output_anchor_index;
  std::vector<int64_t> dims;
  int64_t data_size;
 };

 struct DataDumper::InnerInputMapping {
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -536,7 +536,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  compute_graph_ = GraphUtils::GetComputeGraph(graph);
  GE_CHK_BOOL_RET_STATUS(compute_graph_ != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr.");

  runtime_param_.graph_id = GetGraphID(compute_graph_->GetName());
  runtime_param_.graph_id = compute_graph_->GetGraphID();

  GE_TIMESTAMP_START(TransAllVarData);
  GE_CHK_STATUS_RET(TransAllVarData(compute_graph_, runtime_param_.graph_id), "TransAllVarData failed.");
@@ -1535,7 +1535,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc,
                             "construct output_name failed.");
      // forward compatbility, if old om has no out_node_name, need to return output follow origin way
      if (out_size == out_node_name.size()) {
        output_name = out_node_name[index] + ":" + std::to_string(src_index[index]);
        // neweast plan, the index will add to name during generate model.
        bool contains_colon = out_node_name[index].find(":") != std::string::npos;
        output_name =
          contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]);
      } else {
        output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" +
                      std::to_string(src_index[index]);
@@ -2510,51 +2513,19 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
 }

 Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
  GELOGI("InitTaskInfo in,task size %zu", model_task_def.task().size());
  GELOGI("InitTaskInfo in,task size %d", model_task_def.task().size());
  task_list_.resize(model_task_def.task_size());
  std::vector<std::future<Status>> futures(model_task_def.task_size());
  ThreadPool executor(kThreadNum);
  rtContext_t ctx = nullptr;
  rtError_t rt_ret = rtCtxGetCurrent(&ctx);
  if (rt_ret != RT_ERROR_NONE || ctx == nullptr) {
    GELOGE(RT_FAILED, "Failed to get current context from rt, error-code 0x%X.", rt_ret);
    return RT_FAILED;
  }

  for (int32_t i = 0; i < model_task_def.task_size(); ++i) {
    std::future<Status> f = executor.commit(
      [](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status {
        rtError_t rt_ret = rtCtxSetCurrent(ctx);
        if (rt_ret != RT_ERROR_NONE) {
          GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", rt_ret);
          return RT_FAILED;
        }
        Status ret = FAILED;
        // dynamic shape will create task_list_ before
        if (model->task_list_[idx] == nullptr) {
          model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type()));
          GE_CHECK_NOTNULL(model->task_list_[idx]);
        }
        ret = model->task_list_[idx]->Init(task, model);
        return ret;
      },
      model_task_def.task(i), this, ctx, i);
    if (!f.valid()) {
      GELOGE(FAILED, "Future is invalid");
      return FAILED;
    }
    futures[i] = std::move(f);
  }

  Status ret;
  for (size_t i = 0; i < futures.size(); ++i) {
    ret = futures[i].get();
  for (int i = 0; i < model_task_def.task_size(); ++i) {
    // dynamic shape will create task_list_ before
    const domi::TaskDef &task = model_task_def.task(i);
    task_list_[i] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type()));
    GE_CHECK_NOTNULL(task_list_[i]);
    Status ret = task_list_[i]->Init(task, this);
    if (ret != SUCCESS) {
      GELOGE(ret, "Task index %zu init failed.", i);
      GELOGE(ret, "Task index %d init failed.", i);
      return ret;
    }
  }

  GELOGI("InitTaskInfo out");
  return SUCCESS;
 }
@@ -2623,7 +2594,7 @@ Status DavinciModel::DistributeTask() {
        return PARAM_INVALID;
      }

      if (PropertiesManager::Instance().IsLayerNeedDump(name_, op->GetName())) {
      if (PropertiesManager::Instance().IsLayerNeedDump(name_, om_name_, op->GetName())) {
        SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
      }
    }
@@ -2661,8 +2632,9 @@ Status DavinciModel::DistributeTask() {

 void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
  auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel();
  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
      all_dump_model.find(name_) != all_dump_model.end()) {
  bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
  bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) {
    GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id);
    data_dumper_.SaveEndGraphId(task_id, stream_id);
  }
@@ -3344,17 +3316,6 @@ void DavinciModel::FreeWeightsMem() {
  }
 }

 uint32_t DavinciModel::GetGraphID(const std::string &session_graph_id) {
  std::string session_id = "_";
  auto pos = session_graph_id.find(session_id);
  if (pos != std::string::npos) {
    size_t graph_id_length = session_graph_id.length() - pos - session_id.length();
    std::string graph_id = session_graph_id.substr(pos + session_id.length(), graph_id_length);
    return static_cast<uint32_t>(std::strtol(graph_id.c_str(), nullptr, kDecimal));
  }
  return 0;
 }

 Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) {
  GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id);
  rtContext_t ctx = nullptr;
@@ -3387,6 +3348,7 @@ void DavinciModel::SetDataDumperArgs() {
  data_dumper_.SetModelName(name_);
  data_dumper_.SetModelId(model_id_);
  data_dumper_.SetMemory(runtime_param_);
  data_dumper_.SetOmName(om_name_);

  int32_t device_id = 0;
  rtError_t rt_ret = rtGetDevice(&device_id);
--- a/src/ge/graph/load/new_model_manager/davinci_model.h
+++ b/src/ge/graph/load/new_model_manager/davinci_model.h
@@ -187,6 +187,8 @@ class DavinciModel {
  // model name
  string Name() { return name_; }

  // om_name
  string OmName() { return om_name_; }
  // version
  uint32_t Version() const { return version_; }

@@ -471,6 +473,8 @@ class DavinciModel {
  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
  Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
                                   std::vector<InputOutputDims> &output_dims);
  // om file name
  void SetOmName(string om_name) { om_name_ = om_name; }

 private:
  // memory address of weights
@@ -752,8 +756,6 @@ class DavinciModel {

  void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);

  uint32_t GetGraphID(const std::string &session_graph_id);

  Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);
  Status CopyVarData(ComputeGraphPtr &graph);

@@ -771,6 +773,10 @@ class DavinciModel {
  uint32_t model_id_;
  uint32_t runtime_model_id_;
  string name_;

  // used for inference data dump
  string om_name_;

  uint32_t version_;
  GeModelPtr ge_model_;

--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -820,6 +820,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
      return FAILED;
    }
    davinci_model->SetDeviceId(device_id);
    davinci_model->SetOmName(model.om_name);

    /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
    /// These session_ids come from the same model, so the values of session_id are the same.
--- a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
@@ -47,7 +47,8 @@ Status EndGraphTaskInfo::Distribute() {
  GE_CHECK_NOTNULL(davinci_model_);
  auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel();
  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
      all_dump_model.find(davinci_model_->Name()) != all_dump_model.end()) {
      all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
      all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
    GELOGI("Start to call rtEndGraphEx");
    rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
    if (rt_ret != RT_ERROR_NONE) {
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -153,7 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
                    return FAILED;)

    if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) {
    if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                      op_desc->GetName())) {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
      dump_args_ = input_output_addr_;
    }
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -63,7 +63,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
    return ret;
  }

  domi::KernelDef kernel_def = task_def.kernel();
  const domi::KernelDef &kernel_def = task_def.kernel();
  block_dim_ = kernel_def.block_dim();
  args_size_ = kernel_def.args_size();
  // get opcontext stored in model
@@ -549,7 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
    return FAILED;
  }

  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) {
  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                    op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    dump_args_ = static_cast<char *>(args_) + offset;
  }
@@ -818,7 +819,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    return RT_FAILED;
  }

  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) {
  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                    op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
  }
--- a/src/ge/graph/partition/graph_partition.cc
+++ b/src/ge/graph/partition/graph_partition.cc
@@ -105,9 +105,8 @@ void ge::GraphPartitioner::SetMergedGraphId(ge::ComputeGraphPtr &output_merged_c

 Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr &output_merged_compute_graph,
                                                            const std::vector<SubGraphInfoPtr> &sub_graph_list) {
  ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>("mergedGraph");
  output_merged_compute_graph = new_sub_graph;
  if ((new_sub_graph == nullptr) || (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) {
  if ((output_merged_compute_graph == nullptr) ||
      (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) {
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MergeAllSubGraph failed.");
    return FAILED;
  }
@@ -229,6 +228,9 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co
      return FAILED;
    }
  }
  ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName());
  GE_CHECK_NOTNULL(new_sub_graph);
  output_merged_compute_graph = new_sub_graph;
  GE_TIMESTAMP_START(MergeGraphRemoveNode);
  if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) {
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed");
--- a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc
+++ b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc
@@ -70,6 +70,7 @@ OpDescPtr SameTransdataBreadthFusionPass::GetCastOp(const GeTensorDesc &in_desc,
  cast_op_name << "fusion_cast_" << fusion_cast_op_count++;
  auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST);
  auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op);
  node_op.BreakConnect();
  if (cast_op == nullptr) {
    GELOGE(INTERNAL_ERROR, "new fusion cast op failed!");
    return nullptr;
--- a/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc
+++ b/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc
@@ -501,6 +501,7 @@ OpDescPtr TransOpWithoutReshapeFusionPass::GetCastOp(const GeTensorDesc &cast_in
  cast_op_name << "fusion_cast_op_" << fusion_cast_op_count++;
  auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST);
  auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op);
  node_op.BreakConnect();
  if (cast_op == nullptr) {
    GELOGE(INTERNAL_ERROR, "new cast op failed!");
    return nullptr;
--- a/src/ge/graph/preprocess/graph_preprocess.cc
+++ b/src/ge/graph/preprocess/graph_preprocess.cc
@@ -19,8 +19,6 @@
 #include <set>
 #include <string>
 #include <utility>
 #include "common/formats/format_transfers/format_transfer_fractal_nz.h"
 #include "common/formats/format_transfers/format_transfer_fractal_z.h"
 #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
 #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h"
 #include "common/formats/format_transfers/format_transfer_transpose.h"
@@ -123,9 +121,6 @@ static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
  {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}};

 const char *const kMbatchSwitchnName = "mbatch-switch-name";
 const int64_t kGemmNdShapeSize = 2;
 const int64_t kGemmAlignSize32 = 32;
 const int64_t kGemmAlignSize16 = 16;

 OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) {
  GeTensorPtr tensor = MakeShared<GeTensor>();
@@ -1135,114 +1130,9 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No
  return SUCCESS;
 }

 Status ProcessGemmFractalZ(GeShape &src_shape, std::vector<int64_t> &dst_shape_vec) {
  dst_shape_vec.clear();
  if (src_shape.GetDims().size() != kGemmNdShapeSize) {
    GELOGE(INTERNAL_ERROR, "gemm shape size must be 2");
    return FAILED;
  }
  dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(0), kGemmAlignSize32));
  dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(1), kGemmAlignSize16));
  dst_shape_vec.push_back(kGemmAlignSize16);
  dst_shape_vec.push_back(kGemmAlignSize32);
  return SUCCESS;
 }
 Status SetInOutForGemm(GeTensorDescPtr &input, GeTensorDescPtr &output, GeShape shape, Format format) {
  input->SetShape(shape);
  input->SetFormat(format);
  output->SetShape(shape);
  output->SetFormat(format);
  int64_t input_shape_size = 0;
  int64_t output_shape_size = 0;
  ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size);
  ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*output, output_shape_size);
  if ((input_graph_status != ge::GRAPH_SUCCESS) && (output_graph_status != ge::GRAPH_SUCCESS)) {
    GELOGE(GRAPH_FAILED, "GetTensorSize failed!");
    return FAILED;
  }
  ge::TensorUtils::SetSize(*input, input_shape_size);
  ge::TensorUtils::SetSize(*output, output_shape_size);
  return SUCCESS;
 }

 Status ProcessSingleOpInput(NodePtr &node_ptr, string &single_op_input_format) {
  ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_input_format);
  auto op_desc = node_ptr->GetOpDesc();
  auto data_input = op_desc->MutableInputDesc(0);
  auto data_output = op_desc->MutableOutputDesc(0);
  ge::Format src_format = data_input->GetFormat();
  ge::DataType src_dt = data_input->GetDataType();
  ge::GeShape src_shape = data_input->GetShape();
  std::vector<int64_t> dst_shape_vec;
  if (input_format == FORMAT_FRACTAL_NZ) {
    formats::FormatTransferFractalNz transfer;
    if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str());
      return FAILED;
    }
    ge::GeShape dst_shape(dst_shape_vec);
    if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_NZ) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_NZ desc failed.", op_desc->GetName().c_str());
      return FAILED;
    }
  } else if (input_format == FORMAT_FRACTAL_Z) {
    if (ProcessGemmFractalZ(src_shape, dst_shape_vec) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Op [%s] trans FRACTAL_Z Shape failed.", op_desc->GetName().c_str());
      return FAILED;
    }
    ge::GeShape dst_shape(dst_shape_vec);
    if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_Z) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_Z desc failed.", op_desc->GetName().c_str());
      return FAILED;
    }
  }
  // Gemm shape and format should be set at this stage, temporary solution.
  auto out_anchor = node_ptr->GetOutDataAnchor(0);
  for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
    GE_CHECK_NOTNULL(in_anchor);
    auto index = static_cast<uint32_t>(in_anchor->GetIdx());
    ge::NodePtr next_node = in_anchor->GetOwnerNode();
    GE_CHECK_NOTNULL(next_node);
    auto next_op_desc = next_node->GetOpDesc();
    GE_CHECK_NOTNULL(next_op_desc);
    auto input_desc = next_op_desc->MutableInputDesc(index);
    GE_CHECK_NOTNULL(input_desc);
    input_desc->SetFormat(input_format);
    input_desc->SetShape(data_output->GetShape());
  }
  return SUCCESS;
 }

 Status ProcessSingleOpOutput(OpDescPtr &op_desc, string &single_op_output_format) {
  ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_output_format);
  auto data_input = op_desc->MutableInputDesc(0);
  ge::Format src_format = data_input->GetFormat();
  ge::DataType src_dt = data_input->GetDataType();
  ge::GeShape src_shape = data_input->GetShape();
  std::vector<int64_t> dst_shape_vec;
  if (input_format == FORMAT_FRACTAL_NZ) {
    formats::FormatTransferFractalNz transfer;
    if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str());
      return FAILED;
    }
    ge::GeShape dst_shape(dst_shape_vec);
    data_input->SetShape(dst_shape);
    data_input->SetFormat(FORMAT_FRACTAL_NZ);
  }
  return SUCCESS;
 }

 Status ProcessDataNodeDynShape(NodePtr &node_ptr, bool &is_single_op) {
 Status ProcessDataNodeDynShape(NodePtr &node_ptr) {
  auto op_desc = node_ptr->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  std::string single_op_input_format;
  if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_input_format", single_op_input_format))) {
    if (ProcessSingleOpInput(node_ptr, single_op_input_format) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Process single op input [%s] failed.", node_ptr->GetName().c_str());
      return FAILED;
    }
  }
  bool set_fp16 = false;
  if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) {
    return SUCCESS;
@@ -1375,16 +1265,9 @@ bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_nod
  return false;
 }

 Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type, bool &is_single_op) {
 Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) {
  auto op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  std::string single_op_output_format;
  if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_output_format", single_op_output_format))) {
    if (ProcessSingleOpOutput(op_desc, single_op_output_format) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Process single op output [%s] failed.", node->GetName().c_str());
      return FAILED;
    }
  }
  ge::DataType output_data_type = ge::DT_FLOAT;

  for (const auto &in_anchor : node->GetAllInDataAnchors()) {
@@ -1717,7 +1600,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
      auto format = desc.GetFormat();
      auto origin_format = desc.GetOriginFormat();
      bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format);
      if (is_internal) {
      bool need_check_internal_format = (!options_.is_single_op) && is_internal;
      if (need_check_internal_format) {
        GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.",
               TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str());
        return FAILED;
@@ -2821,14 +2705,14 @@ Status GraphPrepare::UpdateInputOutputByOptions() {
    }

    if (node_ptr->GetType() == DATA) {
      if (ProcessDataNodeDynShape(node_ptr, options_.is_single_op) != SUCCESS) {
      if (ProcessDataNodeDynShape(node_ptr) != SUCCESS) {
        GELOGE(INTERNAL_ERROR, "Process data node failed");
        return FAILED;
      }
    }

    if (node_ptr->GetType() == ge::NETOUTPUT) {
      if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype, options_.is_single_op) != SUCCESS) {
      if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype) != SUCCESS) {
        GELOGE(INTERNAL_ERROR, "Process netoutput node failed");
        return FAILED;
      }
--- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
+++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
@@ -40,6 +40,23 @@ namespace ge {
 namespace {
 const char *const kMbatchSwitchnName = "mbatch-switch-name";
 }  // namespace
 static void ConvertShape2Nhwc(Format &format, vector<int64_t> &shape_vec) {
  if ((format == FORMAT_NHWC) || (shape_vec.size() != static_cast<size_t>(NORMAL_TENSOR_SIZE))) {
    return;
  }
  if (format != FORMAT_NCHW) {
    GELOGW("The format is not NCHW, current format is %s", TypeUtils::FormatToSerialString(format).c_str());
    return;
  }
  vector<int64_t> shape_vec_tmp;
  shape_vec.swap(shape_vec_tmp);
  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_N]);
  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_H]);
  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_W]);
  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_C]);
  return;
 }

 Status InsertNewOpUtil::Init() {
  insert_op_conf_.reset((new (std::nothrow) domi::InsertNewOps()));
  GE_CHECK_NOTNULL(insert_op_conf_);
@@ -223,11 +240,13 @@ Status InsertNewOpUtil::UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &s
    GELOGE(FAILED, "UpdateOutputDesc fail, graph_ret:%d", graph_ret);
    return FAILED;
  }
  GELOGI("Get size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str());
  GELOGI("Get input size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str());
  if (size == 0) {
    GELOGE(FAILED, "Can not get size from aipp [%s]", aipp_op_desc->GetName().c_str());
    return FAILED;
  }
  // Save the input size of aipp node, which will be used in dumping aipp node or fused aipp node
  (void)AttrUtils::SetInt(aipp_input, ATTR_NAME_INPUT_ORIGIN_SIZE, size);

  auto in_data_anchor = node->GetInDataAnchor(0);
  GE_CHECK_NOTNULL(in_data_anchor);
@@ -305,6 +324,7 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt

  auto data_opdesc = data->GetOpDesc();
  GE_CHECK_NOTNULL(data_opdesc);
  Format old_format = output_desc->GetFormat();
  auto ret = data_opdesc->UpdateOutputDesc(0, *input_desc);
  if (ret != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Failed to update data %s output using switchn %s", data->GetName().c_str(),
@@ -317,9 +337,34 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt
           switchn->GetName().c_str());
    return INTERNAL_ERROR;
  }
  // Update attr _mbatch_origin_input_dims for data when it is linked to aipp
  UpdateMultiBatchInputDims(data_opdesc, old_format);
  return SUCCESS;
 }

 void InsertNewOpUtil::UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format) {
  if (!data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) {
    GELOGW("Failed to acquire _mbatch_origin_input_dims attr from node [%s]", data_opdesc->GetName().c_str());
    return;
  }
  auto new_data_dims = data_opdesc->GetOutputDesc(0).GetShape().GetDims();
  vector<int64_t> origin_input_dims;
  (void)AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
  // Convert origin_input_dims to NHWC because data format is set to NHWC when it is linked to aipp.
  ConvertShape2Nhwc(old_format, origin_input_dims);
  if (new_data_dims.size() != origin_input_dims.size()) {
    return;
  }
  for (size_t i = 0; i < origin_input_dims.size(); ++i) {
    // Need to update shape when aipp has crop function because H,W is different, ignore -1.
    if (origin_input_dims[i] > 0) {
      origin_input_dims[i] = new_data_dims[i];
    }
  }
  (void)AttrUtils::SetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
  return;
 }

 Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map) {
  GELOGI("Start to get data and next node %s.", node->GetName().c_str());
  OpDescPtr data_op = node->GetOpDesc();
--- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h
+++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h
@@ -61,6 +61,7 @@ class InsertNewOpUtil {

  std::unique_ptr<domi::InsertNewOps> insert_op_conf_;

  void UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format);
  Status UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &switchns);
  Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data);
  Status GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map);
--- a/src/ge/host_kernels/concat_v2_kernel.cc
+++ b/src/ge/host_kernels/concat_v2_kernel.cc
@@ -31,6 +31,7 @@
 namespace ge {
 namespace {
 const size_t kConcatV2InputNum = 3;
 const int kSupportEmptyTensorRank = 1;
 const std::set<DataType> concatv2_supported_type = {DT_INT32, DT_FLOAT};

 template <typename T>
@@ -39,7 +40,12 @@ void GetOutputData(std::vector<T> &y_data, int64_t loop, size_t &input_size,
  for (int64_t i = 0; i < loop; i++) {
    for (size_t k = 0; k < input_size; k++) {
      GeShape datak_shape = input.at(k)->GetTensorDesc().GetShape();
      const T *datak = reinterpret_cast<const T *>(input.at(k)->GetData().data());
      auto buffer = input.at(k)->GetData();
      const T *datak = reinterpret_cast<const T *>(buffer.data());
      if (datak == nullptr || buffer.size() == 0) {
        GELOGW("input[%zu] is with no data", k);
        continue;
      }
      int64_t gapk = datak_shape.GetShapeSize() / loop;  // [2,3] is 6/loop
      for (int64_t j = 0; j < gapk; j++) {
        y_data.push_back(datak[j + gapk * i]);
@@ -63,7 +69,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
    return PARAM_INVALID;
  }
  int tidx = -1;
  Status ret = ConcatV2PreCompute(input, tidx);
  ConstGeTensorPtr tensor = nullptr;
  Status ret = ConcatV2PreCompute(input, tidx, tensor);
  if (ret != SUCCESS) {
    return ret;
  }
@@ -71,9 +78,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
  size_t input_size = input.size();  // N + 1
  input_size--;                      // N

  ConstGeTensorPtr tensor0 = input.at(0);
  GE_CHECK_NOTNULL(tensor0);
  DataType data_type = tensor0->GetTensorDesc().GetDataType();
  GE_CHECK_NOTNULL(tensor);
  DataType data_type = tensor->GetTensorDesc().GetDataType();
  uint32_t length = 0;
  if (!TypeUtils::GetDataTypeLength(data_type, length)) {
    GELOGW("Can't GetDataTypeLength of data_type: %s", TypeUtils::DataTypeToSerialString(data_type).c_str());
@@ -91,7 +97,7 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
    return MEMALLOC_FAILED;
  }

  GeShape data0_shape = tensor0->GetTensorDesc().GetShape();
  GeShape data0_shape = tensor->GetTensorDesc().GetShape();
  int64_t loop = 1;
  for (int i = 0; i < tidx; i++) {
    loop *= data0_shape.GetDim(i);
@@ -110,29 +116,33 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
  return SUCCESS;
 }

 Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx) {
 Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx,
                                          ConstGeTensorPtr &tensor) {
  size_t input_size = input.size();
  // N >= 2 and N + 1 >= 3
  if (input_size < kConcatV2InputNum) {
    GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum);
    return NOT_CHANGED;
  }

  bool has_empty_tensor = false;
  input_size--;
  for (size_t i = 0; i < input_size; i++) {
    if (input[i] == nullptr) {
      GELOGI("Input%zu must not be null.", i);
      return NOT_CHANGED;
    }
    if (input.at(i)->GetData().size() == 0) {
      GELOGI("Check data size fail. input%zu size is 0.", i);
      return NOT_CHANGED;
      GELOGW("input[%zu] is with no data.", i);
      has_empty_tensor = true;
      continue;
    }
    if (tensor == nullptr) {
      tensor = input.at(i);  // get first valid tensor with data
    }
  }

  input_size--;
  ConstGeTensorPtr tensor0 = input.at(0);
  GE_CHECK_NOTNULL(tensor0);
  DataType data_type = tensor0->GetTensorDesc().GetDataType();
  GE_CHECK_NOTNULL(tensor);
  DataType data_type = tensor->GetTensorDesc().GetDataType();
  for (size_t i = 1; i < input_size; i++) {
    if (data_type != input.at(i)->GetTensorDesc().GetDataType()) {
      GELOGI("Data type of N inputs for ConcatV2 not the same, check input %zu failed.", i);
@@ -149,13 +159,18 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &i
  ConstGeTensorPtr tensor_axis = input.at(input_size);
  GE_CHECK_NOTNULL(tensor_axis);
  const int *axis = reinterpret_cast<const int *>(tensor_axis->GetData().data());
  tidx = axis[0];                                                                // [-rank(values), rank(values))
  int dims = static_cast<int>(tensor0->GetTensorDesc().GetShape().GetDimNum());  // rank
  GE_CHECK_NOTNULL(axis);
  tidx = axis[0];                                                               // [-rank(values), rank(values))
  int rank = static_cast<int>(tensor->GetTensorDesc().GetShape().GetDimNum());  // rank
  if (tidx < 0) {
    tidx += dims;
    tidx += rank;
  }
  if (tidx < 0 || tidx > dims) {
    GELOGI("ConcatV2 tidx not legal.");
  // 1. tidx should in range [0,rank)
  // 2. empty tensor only support case: [n],[m],[]
  // case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported
  if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) {
    GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank,
           has_empty_tensor);
    return NOT_CHANGED;
  }

--- a/src/ge/host_kernels/concat_v2_kernel.h
+++ b/src/ge/host_kernels/concat_v2_kernel.h
@@ -28,7 +28,7 @@ class ConcatV2Kernel : public Kernel {
                 std::vector<GeTensorPtr> &v_output) override;

 private:
  Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx);
  Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx, ConstGeTensorPtr &tensor);
 };
 }  // namespace ge

--- a/src/ge/offline/main.cc
+++ b/src/ge/offline/main.cc
@@ -39,6 +39,7 @@
 #include "ir_build/atc_ir_common.h"
 #include "omg/omg.h"
 #include "omg/parser/parser_factory.h"
 #include "omg/parser/parser_inner_ctx.h"
 #include "parser/common/register_tbe.h"
 #include "register/op_registry.h"
 #include "single_op_parser.h"
@@ -178,8 +179,6 @@ DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress w

 DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable");

 DEFINE_string(quant_optimize, "true", "Optional; enable quant optimize. true: enable; false(default): disable");

 DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null");

 DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0.");
@@ -253,6 +252,9 @@ class GFlagUtils {
      "  --op_select_implmode    Set op select implmode. Support high_precision, high_performance."
      "default: high_performance\n"
      "disable\n"
      "  --optypelist_for_implmode    Appoint which op to use op_select_implmode, used with op_select_implmode ."
      "Separate multiple nodes with commas (,). Use double quotation marks (\") to enclose each argument."
      "E.g.: \"node_name1,node_name2\"\n"
      "  --head_stream       Add head stream. 0(default): disable; 1: enable\n"
      "  --soc_version       The soc version. E.g.: \"Ascend310\"\n"
      "  --core_type         Set core type AiCore or VectorCore. VectorCore: use vector core. "
@@ -270,8 +272,7 @@ class GFlagUtils {
      "Use double quotation marks (\") to enclose each argument."
      "E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n"
      "  --auto_tune_mode    Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n"
      "  --enable_single_stream    Enable single stream. true: enable; false(default): disable\n"
      "  --quant_optimize Enable quant optimize. true(default): enable; false: disable\n");
      "  --enable_single_stream    Enable single stream. true: enable; false(default): disable\n");

    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
    // Using gflags to analyze input parameters
@@ -663,6 +664,27 @@ void LoadCustomOpLib() {
  }
 }

 void SaveCustomCaffeProtoPath() {
  GELOGI("Enter save custom caffe proto path.");
  string customop_path;

  const char *path_env = std::getenv("ASCEND_OPP_PATH");
  if (path_env != nullptr) {
    std::string path = path_env;
    customop_path = path + "/framework/custom/caffe/";
    GELOGI("Get custom proto path from env : %s", path_env);
    ge::GetParserContext().custom_proto_path = customop_path;
    return;
  }
  std::string path_base = ge::GELib::GetPath();
  GELOGI("path_base is %s", path_base.c_str());
  path_base = path_base.substr(0, path_base.rfind('/'));
  path_base = path_base.substr(0, path_base.rfind('/') + 1);
  customop_path = path_base + "ops/framework/custom/caffe/";
  ge::GetParserContext().custom_proto_path = customop_path;
  return;
 }

 #endif

 Status CreateInputsForInference(const ge::Graph &graph, vector<ge::GeTensor> &inputs) {
@@ -850,6 +872,7 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
    atc_params.insert(std::pair<string, string>("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout));
    atc_params.insert(std::pair<string, string>("compress_weight_conf", FLAGS_compress_weight_conf));
    atc_params.insert(std::pair<string, string>(string(ge::OUTPUT_DATATYPE), FLAGS_output_type));
    atc_params.insert(std::pair<string, string>("output", output));

    Status ret =
      ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework,
@@ -982,6 +1005,8 @@ domi::Status GenerateOmModel() {
  // Load custom operator Library
  LoadCustomOpLib();

  SaveCustomCaffeProtoPath();

  ret = ge::CheckCustomAiCpuOpLib();

  GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!");
@@ -1043,8 +1068,6 @@ domi::Status GenerateOmModel() {

  options.insert(std::pair<string, string>(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream));

  options.insert(std::pair<string, string>(string(ge::QUANT_OPTIMIZE), FLAGS_quant_optimize));

  SetDynamicBatchSizeOrImagesizeOptions();

  if (!FLAGS_save_original_model.empty()) {
--- a/src/ge/offline/single_op_parser.cc
+++ b/src/ge/offline/single_op_parser.cc
@@ -273,10 +273,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single
    } else {
      op_desc->AddInputDesc(desc.name, ge_tensor_desc);
    }
    if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) {
      ge_tensor_desc.SetFormat(FORMAT_ND);
      ge_tensor_desc.SetOriginFormat(FORMAT_ND);
    }
    build_param.inputs.emplace_back(ge_tensor_desc);
  }

@@ -292,10 +288,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single
    TensorUtils::SetInputTensor(ge_tensor_desc, false);
    TensorUtils::SetOutputTensor(ge_tensor_desc, true);
    op_desc->AddOutputDesc(ge_tensor_desc);
    if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) {
      ge_tensor_desc.SetFormat(FORMAT_ND);
      ge_tensor_desc.SetOriginFormat(FORMAT_ND);
    }
    build_param.outputs.emplace_back(ge_tensor_desc);
  }

--- a/src/ge/session/omg.cc
+++ b/src/ge/session/omg.cc
@@ -29,6 +29,8 @@
 #include "common/types.h"
 #include "common/util.h"
 #include "common/util/error_manager/error_manager.h"
 #include "common/helper/model_helper.h"
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/omg/parser/parser_inner_ctx.h"
 #include "google/protobuf/io/zero_copy_stream_impl.h"
@@ -419,10 +421,6 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const
      GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str());
      return domi::FAILED;
    }
    if (out_node->GetType() == DATA) {
      GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", user_out_nodes[i].first.c_str());
      return domi::FAILED;
    }
    auto op_desc = out_node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    if (i < output_formats.size()) {
@@ -441,7 +439,7 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const
      (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second);
    }
    output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second));
    output_nodes_name.push_back(out_node->GetName());
    output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(user_out_nodes[i].second));
  }
  // default output node (leaf)
  if (user_out_nodes.empty()) {
@@ -468,7 +466,7 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>>
  if (node->GetType() != NETOUTPUT) {
    for (size_t index = 0; index < size; ++index) {
      output_nodes_info.push_back(std::make_pair(node, index));
      output_nodes_name.push_back(node->GetName());
      output_nodes_name.push_back(node->GetName() + ":" + std::to_string(index));
    }
  } else {
    const auto in_anchors = node->GetAllInDataAnchors();
@@ -480,7 +478,7 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>>
      }
      auto out_node = out_anchor->GetOwnerNode();
      output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx()));
      output_nodes_name.push_back(out_node->GetName());
      output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(out_anchor->GetIdx()));
    }
  }
  return SUCCESS;
@@ -612,9 +610,16 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri
  Params::Instance()->SetTarget(target);

  // Create an empty computegraph
  ComputeGraphPtr compute_graph = nullptr;
  GE_MAKE_SHARED(compute_graph = std::make_shared<ComputeGraph>(kGraphDefaultName + "_" + CurrentTimeInStr()),
                 return FAILED);
  std::string om_name;
  ParseAtcParms(atc_params, "output", om_name);
  ModelHelper model_helper;
  string graph_name = "";
  Status name_ret = model_helper.GetBaseNameFromFileName(om_name, graph_name);
  if (name_ret != SUCCESS) {
    graph_name = kGraphDefaultName + "_" + CurrentTimeInStr();
  }
  ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name);
  GE_CHECK_NOTNULL(compute_graph);
  graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph);

  // initialize omgContext
@@ -664,8 +669,6 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri
    GELOGI("The pre-checking report has been saved to %s.", check_report.c_str());
  }

  // Prevent data residue in multiple calls
  PreChecker::Instance().Clear();
  GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC model parse ret fail.");

  std::string input_fp16_nodes;
@@ -693,12 +696,19 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri
  graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph);
  auto weights_parser = WeightsParserFactory::Instance()->CreateWeightsParser(type);
  ret = weights_parser->Parse(weights_file, graph);
  GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail.");

  // IN ONLY_PRE_CHECK mode, generate pre inspection report only.
  if (run_mode == ONLY_PRE_CHECK) {
  if (PreChecker::Instance().HasError() || run_mode == ONLY_PRE_CHECK) {
    std::string check_report;
    ParseAtcParms(atc_params, "check_report", check_report);
    GE_RETURN_WITH_LOG_IF_ERROR(PreChecker::Instance().Save(check_report), "Generate pre-checking report failed.");
    GEEVENT("The pre-checking report has been saved to %s.", check_report.c_str());
    return SUCCESS;
  }
  // Prevent data residue in multiple calls
  PreChecker::Instance().Clear();

  GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail.");

  GELOGI("ATC parser success.");

--- a/src/proto/fusion_model.proto
+++ b/src/proto/fusion_model.proto
@@ -17,9 +17,10 @@
 syntax = "proto3";

 import "om.proto";

 package domi;

 message FusionModelDef {
    string version = 1;
    repeated OpDef fusion_op = 2;
 }
 }
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1029,9 +1029,9 @@ REG_OP(BesselI1e)
 * y: A Tensor of type UnaryDataType.

 * @attention Constraints:
 * @li "base" is supposed to be greater than 0. Retaining the default \n
 * @li "base" is supposed to be greater than 0. Retaining the default
 * value "-1" sets "base" to "e".
 * @li If the input value of operator Log is within the range (0, 0.01] or \n
 * @li If the input value of operator Log is within the range (0, 0.01] or
 * [0.95, 1.05], the output accuracy is subject to change.

 * @par Third-party framework compatibility
@@ -1047,11 +1047,11 @@ REG_OP(Log)
    .OP_END_FACTORY_REG(Log)

 /**
 * @brief Returns x1 * x2 element-wise.\n
 * @brief Returns x1 * x2 element-wise.
 * y = x1 * x2

 * @par Inputs:
 * @li x1: A Tensor. Must be one of the following types: float16, float32,\n
 * @li x1: A Tensor. Must be one of the following types: float16, float32,
 * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128.
 * @li x2: A Tensor. Must be one of the following types: float16, float32,
 * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128.
@@ -1079,7 +1079,7 @@ REG_OP(Mul)
    .OP_END_FACTORY_REG(Mul)

 /**
 * @brief Computes the gradient of the square root of "x" with regard to its\n
 * @brief Computes the gradient of the square root of "x" with regard to its
 * input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding
 * input gradient.

@@ -3022,6 +3022,7 @@ REG_OP(CosineEmbeddingLoss)
 *@brief Kullback-Leibler divergence.

 *@par Inputs:
 * Two inputs, including:
 *@li x: Tensor of arbitrary shape.
 *@li target: Tensor of the same shape and dtype as x.

--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -93,31 +93,49 @@ REG_OP(MatMulV2)

 *@par Inputs:
 *Five inputs, including:
 *@li a: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. Has format [FRACTAL_NZ].
 *@li b: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. When type is int8, has format [FRACTAL_Z], \n otherwise has format [FRACTAL_NZ].
 *@li c: A matrix Tensor. 2D or higher. Must be one of the following types: \n float16, int32, float32. When type is int32, has format [ND], \n otherwise has format [FRACTAL_NZ].
 *@li alpha: A 1D Tensor. The shape of alpha is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND].
 *@li beta: A 1D Tensor. The shape of beta is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND].
 *@li a: A matrix Tensor. Must be one of the following types: float16, int8.
 * Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
 *@li b: A matrix Tensor. Must be one of the following types: float16, int8.
 * Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z).
 *@li c: A matrix Tensor. Must be one of the following types: float16, int32,
 * float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
 *@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following
 * types: float16, int32, float32. Has format [ND].
 *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following
 * types: float16, int32, float32. Has format [ND].
 * The format of a, b, c has restriction:\n
 * When type of a is int8 and type of c is int32, the format of a, b, c should
 * all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n
 * When type of a is int8 and type of c is float32, the format of a, b, c should
 * all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n
 * When type of a is float16 and type of c is float16, the format of a, b, c
 * should all be ND or FRACTAL_NZ.\n
 * When type of a is float16 and type of c is float32, the format of a, b, c
 * should all be ND or FRACTAL_NZ.

 *@par Attributes:
 *Two attributes, including:
 *@li transpose_a: Optional. A bool.\n If True, changes the shape of "a" from [M, K] to [K, M].\n Reserved parameters, not used for now.
 *@li transpose_b: Optional. A bool.\n If True, changes the shape of "b" from [M, K] to [K, M].\n Reserved parameters, not used for now.
 *@li transpose_a: Optional. A bool. If True, changes the shape of "a" from
 * [M, K] to [K, M].
 *@li transpose_b: Optional. A bool. If True, changes the shape of "b" from
 * [K, N] to [N, K].

 *@par Outputs:
 *@out: The result matrix Tensor. 4D. Must be one of the following types:\n float16, float32, int32. Has format [FRACTAL_NZ].
 *y: The result matrix Tensor. Must be one of the following types: float16,
 * float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a.
 * 2D(ND) or 4D(FRACTAL_NZ).
 */

 REG_OP(Gemm)
 REG_OP(GEMM)
    .INPUT(a, TensorType({DT_FLOAT16, DT_INT8}))
    .INPUT(b, TensorType({DT_FLOAT16, DT_INT8}))
    .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(out, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .ATTR(transpose_a, Bool, false)
    .ATTR(transpose_b, Bool, false)
    .OP_END_FACTORY_REG(Gemm)
    .OP_END_FACTORY_REG(GEMM)

 /**
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b".
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -361,14 +361,14 @@ REG_OP(BatchNormGradExt2)
 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li momentum: An optional string, input x's Scale factor
 *@li variance: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the variance used for inference.
 *@li momentum:  A Tensor of type float32 or float16, represents the mean and the variance's scale factor
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional input, not use
 *@li mode: An optional attr, not use
 *@par Outputs:\n
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
 */
@@ -391,7 +391,7 @@ REG_OP(BNInference)

 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li momentum: An optional float, input x's Scale factor
 *@li momentum: A Tensor of type float32 or float16, the mean and the variance's Scale factor
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
@@ -420,13 +420,13 @@ REG_OP(BnHost)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
 *@li momentum: An optional float, input x's Scale factor
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
 *@li momentum: An optional float32 num, represents the mean and the variance's scale factor
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
 *@li mode: An optional inpout, not use
 *@li mode: An optional attr, not use
 *@par Outputs:\n
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
 */
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -62,7 +62,7 @@ namespace ge {
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
 * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
 * stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n
 * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512.

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
@@ -119,7 +119,7 @@ REG_OP(DepthwiseConv2DBackpropFilter)
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
 * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
 * stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n
 * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512.

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
@@ -178,7 +178,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD)
 * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
 * Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf *
 * Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf *
 * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n

 * @par Third-party framework compatibility
@@ -235,7 +235,7 @@ REG_OP(DepthwiseConv2DBackpropInput)
 * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
 * Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf *
 * Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf *
 * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n

 * @par Third-party framework compatibility
@@ -460,13 +460,10 @@ REG_OP(Conv2DBackpropInputD)
 *@par Inputs:
 * Three inputs:
 * @li x: A Tensor. Must have the same type as "filter". 4D with shape
 * [batch, out_height, out_width, out_channels]
 * or [batch, out_channels, out_height, out_width]. Gradients with respect
 * [batch, out_channels, out_height, out_width]. Gradients with respect
 * to the output of the convolution.
 * @li filter: A Tensor of type float16.
 * 4D with shape [filter_height, filter_width, in_channels, out_channels],
 * or [out_channels, filter_height, filter_width, in_channels],
 * or [out_channels, in_channel, filter_height, filter_width].
 * 4D with shape [out_channels, in_channel, filter_height, filter_width].\n
 * Two optional inputs:
 * @li bias: An optional tensor of type float16
 * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n
@@ -478,14 +475,14 @@ REG_OP(Conv2DBackpropInputD)
 * padding on the feature map
 * @li dilations: A tuple or list of 4 integers. The dilation factor for each
 * dimension of input. Must be [1, 1, 1, 1].
 * @li groups: Number of blocked connections from input channels to \n
 output channels.
 * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".\n
 * @li groups: Number of blocked connections from input channels to
 * output channels.
 * @li data_format: An optional string from: "NCHW". Defaults to "NCHW".\n
  Specify the data format of the input and output data.
 * @li offset_x: An optional integer for quantized deconvolution.
 *@par Outputs:
 * y: A Tensor. Has the same type as "filter". 4D tensor with shape
 * [batch, height, width, channels] or [batch, channels, height, width].
 * [batch, channels, height, width].
 */
 REG_OP(Deconvolution)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
@@ -493,11 +490,11 @@ REG_OP(Deconvolution)
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
    .ATTR(strides, ListInt, {1, 1, 1, 1})
    .ATTR(strides, ListInt, {1, 1})
    .ATTR(pads, ListInt, {0, 0, 0, 0})
    .ATTR(dilations, ListInt, {1, 1, 1, 1})
    .ATTR(groups, Int, 1)
    .ATTR(data_format, String, "NHWC")
    .ATTR(data_format, String, "NCHW")
    .ATTR(offset_x, Int, 0)
    .OP_END_FACTORY_REG(Deconvolution)
 /**
@@ -642,7 +639,7 @@ REG_OP(Conv2DBackpropFilterD)
 * @verbatim
     Output           | Restrictions
    ------------------|----------------------------------------------
     W dimension == 1 | HxW(input) == HxW(filter) == 1x1,2x2...11x11.
     W dimension == 1 | HxW(input) == HxW(filter)
     H dimension == 1 |
    ------------------|----------------------------------------------
     W dimension == 1 | Not supported
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -186,7 +186,7 @@ REG_OP(ROIAlignGrad)
 * Three inputs, including: \n
 *@li features: A 5HD Tensor of type float32 or float16.
 *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, 
 * "x0", "x1", "y0", and "y1".
 * "x0", "y0", "x1", and "y1".
 *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved.

 *@par Attributes:
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -219,7 +219,7 @@ REG_OP(MaxPool3D)
 * @attention Constraints:
 * @li Computing gradients of global pooling is not supported, which means
 * "ksize < x1".
 * @li "ksiez" is in the range [1, 255]. "strides" is in the range [1, 63]
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolGrad.
@@ -239,10 +239,9 @@ REG_OP(MaxPoolGrad)
 * @brief Computes second-order gradients of the maxpooling function.

 * @par Inputs:
 * @li x1: Original forward input tensor. Supported type:float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 * @li x2: Has the same type and format as input "x1".
 * @li grad:Has the same type and format as input "x1".
 * @li x1: Original forward input tensor of type RealNumberType
 * @li x2: Original forward output tensor of type RealNumberType
 * @li grad: Gradient tensor of type RealNumberType

 * @par Attributes:
 * @li ksize: A required list or tuple,
@@ -258,9 +257,12 @@ REG_OP(MaxPoolGrad)
 * @li "x1" and "grads" must have the same shape.
 * @li "x2" and "y" must have the same shape. Otherwise, an error is reported.
 * @li "x1", "x2", "grads", and "y" must be 5D tensors.
 * @li ksize[H] and ksize[W] is in the range [1, 255].
 * @li strides[H] and strides[W] is in the range [1, 63].
 * @li Other dimensions of ksize and strides is 1.

 * @par Outputs:
 * @li y: Has the same type and format as input "x1".
 * @li y: Result tensor of type RealNumberType

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -399,18 +401,15 @@ REG_OP(MaxPoolGradWithArgmax)
 * @brief Computes second-order gradients of the maxpooling function.

 * @par Inputs:
 * @li x: Original forward input tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 * @li grad: Gradient tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 * @li argmax: An tensor of type int32 or int64.
 * @li x: Original forward input tensor of type RealNumberType
 * @li grad: Gradient tensor of type RealNumberType
 * @li argmax: An tensor of type IndexNumberType
 * @par Attributes:
 * @li ksize: A required list, specifying the size of the sliding window.
 * @li strides: A required list, specifying the stride of the sliding window.
 * @li padding: A required string, window sliding mode. Either SAME or VALID.
 * @par Outputs:
 * @li y:Result tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64
 * @li y:Result tensor of type RealNumberType

 * @attention Constraints:
 * @li Only the cloud platform is supported.
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -41,7 +41,7 @@ namespace ge {
 *@li beta1: A scalar. Has the same type as "var".
 *@li beta2: A scalar. Has the same type as "var".
 *@li epsilon: A scalar. Has the same type as "var".
 *@li grad: A tensor for the gradient. Has the same type as "var". 
 *@li grad: A tensor for the gradient. Has the same type as "var".
 *
 *@par Attributes:
 * use_locking: An optional bool. Defaults to "False".
@@ -465,7 +465,7 @@ REG_OP(ApplyKerasMomentumD)


 /**
 *@brief Updates '*var' according to the Adam algorithm..
 *@brief Updates '*var' according to the Adam algorithm.
 *   lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
 *   m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
 *   v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
@@ -866,7 +866,7 @@ REG_OP(ApplyCenteredRMSProp)
    .OUTPUT(var, TensorType::NumberType())
    .ATTR(use_locking, Bool, false)
    .OP_END_FACTORY_REG(ApplyCenteredRMSProp)
 	

 /**
 *@brief Updates "var" according to the centered RMSProp algorithm.
 *  The centered RMSProp algorithm uses an estimate of the centered second moment
@@ -1262,7 +1262,7 @@ REG_OP(DataFormatDimMap)
    .OP_END_FACTORY_REG(DataFormatDimMap)

 /**
 * @brief Implements stochastic gradient descent (optionally with momentum).\n
 * @brief Implements stochastic gradient descent (optionally with momentum).
 * Nesterov momentum is based on the formula from
 * On the importance of initialization and momentum in deep learning.\n

@@ -1508,7 +1508,7 @@ REG_OP(ApplyProximalAdagradD)
 *@par Attributes:
 *use_locking: An optional bool. Defaults to "False".\n
 *     If "True", updating of the var and accum tensors will be protected by a lock; \n
 *     If "False", the behavior is undefined, but may exhibit less contention. 
 *     If "False", the behavior is undefined, but may exhibit less contention.

 *@par Outputs:
 *var: A mutable Tensor. Has the same type as "var".
@@ -2172,13 +2172,13 @@ REG_OP(SparseApplyFtrl)
 * Should be a Variable Tensor.
 * @li grad: A Tensor of the same type as "var", for the gradient.
 * @li indices: A vector of indices into the first dimension of var and accum.

 * @par Attributes:
 * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
 * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
 * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
 * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.

 * @par Attributes:
 * use_locking: An optional bool. Defaults to "False".
 * @li use_locking: An optional bool. Defaults to "False".
 * If "True", updating of the "var" and "accum" tensors will be
 * protected by a lock; otherwise the behavior is undefined,
 * but may exhibit less contention.
@@ -2314,6 +2314,7 @@ REG_OP(SparseApplyFtrlV2D)
 *    var <- var - mom\n
 *
 * @par Inputs:
 * Nine inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in\n
 * TensorType::NumberType(). Should be from a Variable().
 * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
@@ -2367,6 +2368,7 @@ REG_OP(SparseApplyRMSProp)
 *     var <- var - mom
 *
 * @par Inputs:
 * Six inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in
 * TensorType::NumberType(). Should be from a Variable().
 * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
@@ -2418,6 +2420,7 @@ REG_OP(SparseApplyRMSPropD)
 *    accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
 *
 * @par Inputs:
 * Eight inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in\n
 * TensorType::NumberType(). Should be from a Variable().
 * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
@@ -2468,6 +2471,7 @@ REG_OP(SparseApplyAdadelta)
 *    accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
 *
 * @par Inputs:
 * Seven inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in
 * TensorType::NumberType(). Should be from a Variable().
 * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -203,11 +203,11 @@ REG_OP(Sigmoid)
 * @brief Computes z = (y - y*y)*dy.

 * @par Inputs:
 * @li y: the input is tensor , dtype is UnaryDataType.
 * @li dy the input is tensor , dtype is UnaryDataType.
 * @li y: The input is Tensor, dtype is UnaryDataType.
 * @li dy: The input is Tensor, dtype is UnaryDataType.

 * @par Outputs:
 * z: the shape of output, dtype is UnaryDataType.
 * z: The shape of output, dtype is UnaryDataType.
 */
 REG_OP(SigmoidGrad)
    .INPUT(y, TensorType(UnaryDataType))
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -21,17 +21,17 @@
 namespace ge {

 /**
 * @brief Dequantizes the input tensor into a float tensor.\n
 * [input_min_range, input_max_range] are scalar floats that specify the range
 * for "output_data". \n
 * @brief Dequantizes the input tensor into a float tensor.
 * [min_range, max_range] are float32 tensors that specify the range
 * for "y". \n
 * The "mode" attribute controls exactly which calculations are used to convert\n
 * the float values to their quantized equivalents.
 * @par Inputs:
 * @li input_data: A Tensor. Must be one of the following types: int8, uint8,
 * @li x: A Tensor. Must be one of the following types: int8, uint8,
 * int32.
 * @li input_min_range: A Tensor of type float32.
 * @li min_range: A Tensor of type float32.
 * Specifies the minimum scalar value possibly produced for the input.
 * @li input_max_range: A Tensor of type float32.
 * @li max_range: A Tensor of type float32.
 * Specifies the maximum scalar value possibly produced for the input.

 * @par Attributes:
@@ -39,11 +39,11 @@ namespace ge {
 * Defaults to "MIN_COMBINED".

 * @par Outputs:
 * output_data: A dictionary of type float32.
 * y: A dictionary of type float32.

 * @attention Constraints:
 * @li "input_min_range" and "input_max_range" have the same shapes.
 * @li "input_data" and "output_data" have the same shapes.
 * @li "min_range" and "max_range" have the same shapes.
 * @li "x" and "y" have the same shapes.

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Dequantize.
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -149,7 +149,7 @@ REG_OP(TileD)
 * @li indices: A Tensor of type IndexNumberType.

 * @par Outputs:
 * output: A Tensor of type BasicType.
 * y: A Tensor of type BasicType.
 * @see GatherNd()

 * @attention Constraints:
@@ -767,6 +767,7 @@ REG_OP(SliceD)
 * dimension.

 * @par Inputs:
 * Two inputs, including:
 * @li x: A 1D or higher tensor of type float16, with the last dimension at
 * least "k".
 * Specifies the data to sort.
@@ -789,7 +790,7 @@ REG_OP(SliceD)
 * @li indices: A Tensor of type int32, specifying the indices of sorted data.

 * @attention Constraints:
 * @li k =< 4096
 * @li k =< 5120
 * @li Size of the last dimension =< 65500
 * @li sorted = true
 * @li Don't support to get score on the platform of Ascend310
@@ -813,6 +814,7 @@ REG_OP(TopKD)
 * dimension.

 * @par Inputs:
 * Two inputs, including:
 * @li x: A 1D or higher tensor of type BasicType, with the last dimension
 * at least "k".
 * @li k: A 0D Tensor of type int32.\n
@@ -902,8 +904,8 @@ REG_OP(ScatterNdD)
 * @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids.

 * @par Attributes:
 * @li k: A required int32, specifying the number of top elements to look at for
 * computing precision.
 * @li k: A required IndexNumberType, specifying the number of top elements to
 * look at for computing precision.

 * @par Outputs:
 * y: A Tensor of type bool.
@@ -1000,6 +1002,7 @@ REG_OP(StridedSliceAssign)
 * "strides", etc. work exactly as in "StridedSlice".

 * @par Inputs:
 * Two inputs, including:
 * @li var: A mutable ND Tensor of type BasicType.
 * @li input_value: A mutable ND "Tensor" of type BasicType.

@@ -1335,7 +1338,7 @@ REG_OP(InplaceSubD)
    .OP_END_FACTORY_REG(InplaceSubD)

 /**
 * @brief Applies sparse addition to input "x" using individual values or slices\n
 * @brief Applies sparse addition to input "x" using individual values or slices
 * from "updates" according to "indices". The updates are non-aliasing: "x" is\n
 * only modified in-place if no other operations will use it. Otherwise, a copy\n
 * of "x" is made. This operation has a gradient with respect to both "x" and
@@ -1372,7 +1375,7 @@ REG_OP(ScatterNonAliasingAdd)
 * @li x: A Tensor of type RealNumberType.
 * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
 * of "x.shape".
 * @li k: A Tensor.
 * @li num_segments: A Tensor of type IndexNumberType.

 * @par Outputs:
 * y: A Tensor of type RealNumberType.
@@ -1419,13 +1422,13 @@ REG_OP(UnsortedSegmentMinD)

 * @par Inputs:
 * Three inputs, including:
 * @li x: A Tensor of type RealNumberType.
 * @li x: A Tensor of type NumberType.
 * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
 * of "x.shape".
 * @li k: A Tensor.
 * @li num_segments: A Tensor of type IndexNumberType.

 * @par Outputs:
 * y: A Tensor of type RealNumberType.
 * y: A Tensor of type NumberType.

 * @see UnsortedSegmentSum(), UnsortedSegmentMin(),

--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -20,19 +20,38 @@
 #include "graph/operator_reg.h"

 namespace ge {
 /**
 *@brief Convert tensor format from HWCN to C1HWNCoC0.

 *@par Inputs:
 *x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN.

 *@par Outputs:
 *y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0.
 */
 REG_OP(DepthwiseWeight4DTo6D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
    .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D)

 /**
 *@brief Convert tensor format from C1HWNCoC0 to HWCN.

 *@par Inputs:
 *x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0.

 *@par Attributes:
 *channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN.

 *@par Outputs:
 *y: A 4D Tensor. Has the same type as "x", with format HWCN.
 */
 REG_OP(DepthwiseWeight6DTo4D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
    .ATTR(channel_size, Int, 16)
    .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D)



 /**
 *@brief Permutes the dimensions according to perm.\n
        The returned tensor's dimension i will correspond to the input dimension perm[i].
@@ -390,20 +409,20 @@ REG_OP(SpaceToBatchD)
    .OP_END_FACTORY_REG(SpaceToBatchD)

 /**
 * @brief Unpacks the given dimension of a rank-R tensor "x" into rank-(R-1)
 * @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1)
 * tensors.

 * @par Inputs:
 * x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0.

 * @par Attributes:
 * @li num: An optional int, specifying the number of tensors to be unpacked to.
 * @li num: A required int, specifying the number of tensors to be unpacked to.
 * Defaults to "None".
 * @li axis: A required int, specifying the axis to unpack along. The value range
 * @li axis: An optional int, specifying the axis to unpack along. The value range
 * is [-R, R).

 * @par Outputs:
 * y: The list of Tensor objects unpacked from "x", of type BasicType.
 * y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType.

 * @attention Constraints:
 * @li If "num" is not specified, it is inferred from the shape of "x".
@@ -434,11 +453,11 @@ REG_OP(Unpack)
 * dimension of images.
 * @li strides: A required list or tuple. How far the centers of two consecutive
 * patches are in the images. Must be: [1, stride_rows, stride_cols, 1].
 * @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1]. \n
 * This is the input stride, specifying how far two consecutive patch  \n
 * @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].\n
 * This is the input stride, specifying how far two consecutive patch\n
 * samples are in the input. Equivalent to extracting patches
 * with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n
 * (rates - 1), followed by subsampling them spatially by a factor of rates. \n
 * (rates - 1), followed by subsampling them spatially by a factor of rates.\n
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding: A required string. The type of padding algorithm to use.

--- a/third_party/fwkacllib/inc/register/op_registry.h
+++ b/third_party/fwkacllib/inc/register/op_registry.h
@@ -59,6 +59,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {

  domi::ParseParamFunc GetParseParamFunc(const std::string &op_type);

  domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &op_type);

  domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type);

  domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);
@@ -73,6 +75,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
  std::unordered_map<std::string, std::set<std::string>> op_ori_optype_map_;
  std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
  std::unordered_map<std::string, ParseParamFunc> opParseParamsFnMap_;
  std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
  std::unordered_map<std::string, FusionParseParamFunc> fusionOpParseParamsFnMap_;
  std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
  std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;