From e0a5b21daa071812c2a87111a7c24c457090a8fb Mon Sep 17 00:00:00 2001
From: lwx897429 <lilei216@hisilicon.com>
Date: Wed, 30 Dec 2020 16:05:37 +0800
Subject: [PATCH 01/54] Memory optimization during model loading

---
 .../load/new_model_manager/data_dumper.cc     |  7 ++++
 ge/graph/load/new_model_manager/data_dumper.h | 10 +++---
 .../load/new_model_manager/davinci_model.cc   | 32 ++++++++++++-------
 .../load/new_model_manager/davinci_model.h    | 12 +++++--
 ...ew_model_manager_davinci_model_unittest.cc | 11 +++++++
 5 files changed, 54 insertions(+), 18 deletions(-)
diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc
index b33a062d..f7f23dc1 100644
--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@@ -830,6 +830,13 @@ Status DataDumper::UnloadDumpInfo() {
   return SUCCESS;
 }
 
+void DataDumper::DumpShrink() {
+  compute_graph_.reset();
+  input_map_.clear();
+  ref_info_.clear();
+  op_list_.clear();
+}
+
 void DataDumper::PrintCheckLog(string &dump_list_key) {
   std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
   if (model_list.empty()) {
diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h
index 46ead310..8e612688 100755
--- a/ge/graph/load/new_model_manager/data_dumper.h
+++ b/ge/graph/load/new_model_manager/data_dumper.h
@@ -83,6 +83,8 @@ class DataDumper {
 
   Status UnloadDumpInfo();
 
+  void DumpShrink();
+
   void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
   const DumpProperties &GetDumpProperties() const { return dump_properties_; }
   bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
@@ -112,18 +114,18 @@ class DataDumper {
   struct InnerInputMapping;
 
   std::vector<OpDescInfo> op_desc_info_;
-  std::vector<InnerDumpInfo> op_list_;
+  std::vector<InnerDumpInfo> op_list_;  // release after DavinciModel::Init
   uint32_t end_graph_task_id_ = 0;
   uint32_t end_graph_stream_id_ = 0;
   bool is_end_graph_ = false;
-  std::multimap<std::string, InnerInputMapping> input_map_;
+  std::multimap<std::string, InnerInputMapping> input_map_;  // release after DavinciModel::Init
   bool load_flag_;
   uint32_t device_id_;
   uintptr_t global_step_;
   uintptr_t loop_per_iter_;
   uintptr_t loop_cond_;
-  ComputeGraphPtr compute_graph_;
-  std::map<OpDescPtr, void *> ref_info_;
+  ComputeGraphPtr compute_graph_;  // release after DavinciModel::Init
+  std::map<OpDescPtr, void *> ref_info_;  // release after DavinciModel::Init
   void *l1_fusion_addr_ = nullptr;
 
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 7721739b..7427489b 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -150,14 +150,7 @@ DavinciModel::~DavinciModel() {
       GELOGW("UnloadDumpInfo failed, ret: %u.", ret);
     }
 
-    for (const auto &op_and_addr : saved_task_addrs_) {
-      auto addr = op_and_addr.second;
-      if (addr != nullptr) {
-        GE_CHK_RT(rtFree(addr));
-      }
-      addr = nullptr;
-    }
-    saved_task_addrs_.clear();
+    ClearTaskAddrs();
 
     GE_CHK_STATUS(ModelRunStop());
 
@@ -221,6 +214,17 @@ DavinciModel::~DavinciModel() {
   }
 }
 
+void DavinciModel::ClearTaskAddrs() {
+  for (const auto &op_and_addr : saved_task_addrs_) {
+    auto addr = op_and_addr.second;
+    if (addr != nullptr) {
+      GE_CHK_RT(rtFree(addr));
+    }
+    addr = nullptr;
+  }
+  saved_task_addrs_.clear();
+}
+
 void DavinciModel::UnbindHcomStream() {
   if (!all_hccl_stream_list_.empty()) {
     for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) {
@@ -263,7 +267,10 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) {
 ///
 void DavinciModel::Shrink() {
   skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr};
+  DumperShrink();
   ge_model_.reset();  // delete object.
+  op_list_.clear();
+  ClearTaskAddrs();
 }
 
 Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
@@ -738,7 +745,6 @@ Status DavinciModel::ReportProfilingData() {
   }
   ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
   GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
-  op_list_.clear();
 
   return SUCCESS;
 }
@@ -963,7 +969,9 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
   }
 
   data_by_index[data_index] = op_desc;
-  data_op_list_.push_back(op_desc);
+  auto data_op = AttrUtils::CopyOpDesc(op_desc);
+  GE_CHECK_NOTNULL(data_op);
+  data_op_list_.push_back(data_op);
   if (known_node_) {
     return SUCCESS;
   }
@@ -1019,7 +1027,9 @@ Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
 
   data_op_list_.clear();
   for (auto &item : data_by_index) {
-    data_op_list_.emplace_back(item.second);
+    auto data_op = AttrUtils::CopyOpDesc(item.second);
+    GE_CHECK_NOTNULL(data_op);
+    data_op_list_.emplace_back(data_op);
     auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
     GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
     input_addrs_list_.emplace_back(output_addrs);
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 906c0548..10cda88d 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -481,6 +481,10 @@ class DavinciModel {
     data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
   }
 
+  void DumperShrink() {
+    data_dumper_.DumpShrink();
+  }
+
   void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
   DavinciModel &operator=(const DavinciModel &model) = delete;
 
@@ -644,6 +648,8 @@ class DavinciModel {
 
   void ReleaseTask();
 
+  void ClearTaskAddrs();
+
   void UnbindTaskSinkStream();
 
   bool IsAicpuKernelConnectSpecifiedLayer();
@@ -875,12 +881,12 @@ class DavinciModel {
   string om_name_;
 
   uint32_t version_;
-  GeModelPtr ge_model_;
+  GeModelPtr ge_model_;  // release after DavinciModel::Init
 
   bool need_destroy_aicpu_kernel_{false};
   vector<string> out_node_name_;
 
-  map<uint32_t, OpDescPtr> op_list_;
+  map<uint32_t, OpDescPtr> op_list_;  // release after DavinciModel::Init
 
   // data op_desc
   vector<OpDescPtr> data_op_list_;
@@ -975,7 +981,7 @@ class DavinciModel {
   DataDumper data_dumper_;
   uint64_t iterator_count_;
   bool is_l1_fusion_enable_;
-  map<OpDescPtr, void *> saved_task_addrs_;
+  map<OpDescPtr, void *> saved_task_addrs_;  // release after DavinciModel::Init
   void *l1_fusion_addr_ = nullptr;
 
   bool known_node_ = false;
diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc
index 44642f93..00069930 100644
--- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc
@@ -254,6 +254,17 @@ TEST_F(UtestModelManagerDavinciModel, eventlist_success) {
   delete model;
 }
 
+// test Shrink
+TEST_F(UtestModelManagerDavinciModel, shrink_success) {
+  DavinciModel model(0, g_label_call_back);
+  OpDescPtr op_desc_ptr = make_shared<OpDesc>("Cast", "Cast");
+  void *addr = nullptr;
+  rtMalloc(&addr, 128, RT_MEMORY_HBM);
+  model.saved_task_addrs_.emplace(op_desc_ptr, addr);
+  model.Shrink();
+  EXPECT_EQ(model.saved_task_addrs_.isEmpty(), true);
+}
+
 // test rtMalloc
 TEST_F(UtestModelManagerDavinciModel, failed_reset_device) {
   DavinciModel model(0, g_label_call_back);

From 7d336c66a6088af8681f1c52da7a1ca12a139a3f Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Tue, 29 Dec 2020 21:43:50 +0800
Subject: [PATCH 02/54] Free memory before return

---
 ge/common/profiling/profiling_manager.cc      |  6 +-
 ge/graph/build/model_builder.cc               | 12 ++--
 .../load/new_model_manager/model_manager.cc   | 36 ++++++----
 tests/ut/ge/CMakeLists.txt                    |  1 +
 ...el_manager_model_manager_aicpu_unittest.cc | 66 +++++++++++++++++++
 5 files changed, 101 insertions(+), 20 deletions(-)
 create mode 100644 tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc

diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index abc4a6df..92417286 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -94,7 +94,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
   if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
     // enable profiling by ge option
     if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
-                 MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
+                  MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
       GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
       return INTERNAL_ERROR;
     }
@@ -124,8 +124,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
     return ge::PARAM_INVALID;
   }
 
-  if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(),
-               MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
+  if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) !=
+      EOK) {
     GELOGE(INTERNAL_ERROR, "copy job_id failed.");
     return INTERNAL_ERROR;
   }
diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 77f8f237..de586275 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -805,7 +805,7 @@ Status ModelBuilder::CompileSingleOp() {
 }
 
 void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
-                                           std::set<std::string> &aicpu_tf_op_types) {
+                                         std::set<std::string> &aicpu_tf_op_types) {
   std::string aicpu_optype;
   bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
   std::vector<std::string> tf_optypes;
@@ -822,7 +822,7 @@ void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std:
 }
 
 void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
-                                            std::set<std::string> &aicpu_tf_op_types) {
+                                          std::set<std::string> &aicpu_tf_op_types) {
   std::vector<std::string> aicpu_optype_list;
   std::vector<std::string> aicpu_tf_optype_list;
   if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
@@ -839,10 +839,10 @@ void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string
   aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end());
   aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end());
   GELOGI(
-      "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, "
-      "aicpu_tf_optype_list:%zu.",
-      compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
-      aicpu_tf_optype_list.size());
+    "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, "
+    "aicpu_tf_optype_list:%zu.",
+    compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
+    aicpu_tf_optype_list.size());
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return,
                    "Set attr needCheckCpu fail.");
 
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 4b0dbee0..01075255 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1563,6 +1563,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   size_t aicpu_op_nums = aicpu_optype_list.size();
   size_t tf_op_nums = aicpu_tf_optype_list.size();
   size_t op_nums = aicpu_op_nums + tf_op_nums;
+  std::function<void()> callback = [&]() {
+    for (auto mem : allocated_mem) {
+      GE_CHK_RT(rtFree(mem));
+    }
+  };
   // malloc sysOpInfoList in SysOpCheckInfo
   status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
@@ -1575,6 +1580,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    GE_MAKE_GUARD(release, callback);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(d_res_op_list);
@@ -1583,6 +1589,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    GE_MAKE_GUARD(release, callback);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(d_ret_code_list);
@@ -1594,6 +1601,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
     if (status != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+      GE_MAKE_GUARD(release, callback);
       return RT_ERROR_TO_GE_STATUS(status);
     }
     allocated_mem.push_back(d_op_type_name);
@@ -1611,6 +1619,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
     if (status != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+      GE_MAKE_GUARD(release, callback);
       return RT_ERROR_TO_GE_STATUS(status);
     }
     allocated_mem.push_back(d_op_type_name);
@@ -1639,37 +1648,39 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    GE_MAKE_GUARD(release, callback);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(args);
-  GE_CHK_RT(
-      rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
-  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)),
-                     sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo),
+                     RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(rtMemcpy(
+    reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) +
+    op_check_info_req.offSetLen)), sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res),
+    sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
   GE_CHK_RT(rtStreamCreate(&stream, 0));
   GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));
 
   status = rtStreamSynchronize(stream);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
+    GE_MAKE_GUARD(release, callback);
+    GE_CHK_RT(rtStreamDestroy(stream));
     return RT_ERROR_TO_GE_STATUS(status);
   }
 
   // Check the response
-  SysOpCheckResp *d_op_check_info_res = reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
+  SysOpCheckResp *d_op_check_info_res =
+    reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(
+    reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
   (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp));
   GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
                      RT_MEMCPY_DEVICE_TO_HOST));
-  std::function<void()> callback = [&]() {
-    for (auto mem : allocated_mem) {
-      GE_CHK_RT(rtFree(mem));
-    }
-    GE_CHK_RT(rtStreamDestroy(stream));
-  };
 
   if (op_check_info_res.isWithoutJson) {
     GELOGI("No need to check aicpu in this scenoria.");
     GE_MAKE_GUARD(release, callback);
+    GE_CHK_RT(rtStreamDestroy(stream));
     return SUCCESS;
   }
   uint64_t res_op_nums = op_check_info_res.opListNum;
@@ -1688,6 +1699,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
       GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
       GE_MAKE_GUARD(release, callback);
+      GE_CHK_RT(rtStreamDestroy(stream));
       return FAILED;
     }
     std::string fail_reason;
@@ -1711,10 +1723,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     fail_reason += "not support.";
     GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
     GE_MAKE_GUARD(release, callback);
+    GE_CHK_RT(rtStreamDestroy(stream));
     return FAILED;
   }
 
   GE_MAKE_GUARD(release, callback);
+  GE_CHK_RT(rtStreamDestroy(stream));
   GELOGI("Cpu kernel launch check optype task success.");
   return SUCCESS;
 }
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 175774bb..fbeb9867 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -562,6 +562,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
     #"graph/load/new_model_manager_davinci_model_unittest.cc"
     #"graph/load/new_model_manager_model_manager_unittest.cc"
     #"graph/load/new_model_manager_task_build_unittest.cc"
+	"graph/load/new_model_manager_model_manager_aicpu_unittest.cc"
     "graph/load/end_graph_task_unittest.cc"
     "graph/load/new_model_manager_event_manager_unittest.cc"
     #"graph/load/output_net_output_unittest.cc"
diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
new file mode 100644
index 00000000..0539bcdb
--- /dev/null
+++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cce/compiler_stub.h>
+#include "common/debug/log.h"
+#include "common/model_parser/base.h"
+#include "common/properties_manager.h"
+#include "common/types.h"
+#include "common/l2_cache_optimize.h"
+
+#define private public
+#define protected public
+#include "graph/load/new_model_manager/model_manager.h"
+
+#include "common/helper/om_file_helper.h"
+#include "common/op/ge_op_utils.h"
+#include "graph/load/graph_loader.h"
+#include "graph/load/new_model_manager/davinci_model.h"
+#include "graph/load/new_model_manager/davinci_model_parser.h"
+//#include "new_op_test_utils.h"
+#undef private
+#undef protected
+
+using namespace std;
+using namespace testing;
+
+namespace ge {
+
+const static std::string ENC_KEY = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
+
+class UtestModelManagerModelManagerAicpu : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() {}
+};
+
+TEST_F(UtestModelManagerModelManagerAicpu, checkAicpuOptype) {
+  ModelManager model_manager;
+  uint32_t model_id = 0;
+  std::vector<std::string> aicpu_op_list;
+  std::vector<std::string> aicpu_tf_list;
+  aicpu_tf_list.emplace_back("FrameworkOp");
+  aicpu_tf_list.emplace_back("Unique");
+
+  model_manager.LaunchKernelCheckAicpuOp(aicpu_op_list, aicpu_tf_list);
+  // Load allow listener is null
+  //EXPECT_EQ(ge::FAILED, mm.LoadModelOffline(model_id, data, nullptr, nullptr));
+}
+
+}  // namespace ge

From 0074b0b48f295ede76586cc187e2b12c26121a5d Mon Sep 17 00:00:00 2001
From: wangxiaotian22 <wangxiaotian4@huawei.com>
Date: Wed, 30 Dec 2020 16:29:29 +0800
Subject: [PATCH 03/54] revert broadcast in train graph related

---
 ge/graph/build/memory/block_mem_assigner.cc   |  95 +----
 ge/graph/build/memory/block_mem_assigner.h    |   6 +-
 .../load/new_model_manager/davinci_model.cc   |  12 +
 ge/graph/manager/graph_manager.cc             |   3 -
 ge/graph/passes/hccl_memcpy_pass.cc           | 333 ++----------------
 ge/graph/passes/hccl_memcpy_pass.h            |  17 -
 ge/graph/preprocess/graph_preprocess.cc       |   3 +
 7 files changed, 61 insertions(+), 408 deletions(-)

diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc
index 3acd4a7f..9dc0cf73 100755
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -551,31 +551,11 @@ void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_
   }
 }
 
-void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) {
-  auto node_op_desc = node->GetOpDesc();
-  GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
-  // if input size just one and from variable, no need to reassign continuous memory
-  bool is_input_continuous = false;
-  (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
-  if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) {
-    auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor();
-    GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return);
-    auto in_node = peer_out_anchor->GetOwnerNode();
-    GE_IF_BOOL_EXEC(in_node == nullptr, return);
-    if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) {
-      GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str());
-      (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
-    }
-  }
-}
-
 void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
   vector<int64_t> temp;
   std::map<std::string, vector<int64_t>> batch_all_memory_size;
   std::map<std::string, int64_t> batch_total_size;
   for (const NodePtr &n : compute_graph_->GetAllNodes()) {
-    MarkContinuousAllocedForOneInputFromVariable(n);
-
     auto node_op_desc = n->GetOpDesc();
     GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
 
@@ -1081,53 +1061,18 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
   return block;
 }
 
-void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef,
-                                             const NodePtr &n) {
-  const auto node_op_desc = n->GetOpDesc();
-  for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
-    int32_t reuse_in_index = -1;
-    if (!GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) {
-      isAllOutputRef = false;
-      break;
-    } else {
-      zero_memory_list_.emplace_back(n, kOutput, index);
-      isOutputHasRef = true;
-    }
-  }
-}
-
-
-Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
+MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
                                                      const bool is_op_reuse_mem) {
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null.");
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
   auto node_op_desc = n->GetOpDesc();
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null.");
-
-  // continuous output support ref only when all output ref input
-  bool isAllOutputRef = true;
-  bool isOutputHasRef = false;
-
-  ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n);
-
-  if (isAllOutputRef) {
-    GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str());
-    return SUCCESS;
-  }
-
-  if (!isAllOutputRef && isOutputHasRef) {
-    GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s",
-           n->GetName().c_str());
-    return INTERNAL_ERROR;
-  }
-
+  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
   MemoryBlock *block = nullptr;
   int64_t total_size = 0;
   int64_t memory_type = RT_MEMORY_HBM;
   for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
     auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
     if (output_op_desc == nullptr) {
-      GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
-      return INTERNAL_ERROR;
+      return nullptr;
     }
 
     if (CheckIsZeroMemNodeType(n->GetType())) {
@@ -1137,8 +1082,8 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
 
     int64_t size = 0;
     if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
-      return INTERNAL_ERROR;
+      GELOGI("Get size failed");
+      return nullptr;
     }
     size_t align_size = static_cast<size_t>(size);
     AlignMemOffset(align_size);
@@ -1161,7 +1106,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
   }
 
   if (total_size == 0) {
-    return SUCCESS;
+    return nullptr;
   }
 
   auto block_size = GetBlockSize(total_size, ranges);
@@ -1175,11 +1120,8 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
     // hccl task need align header and tail
     block->first_continuous_block_ = true;
     block->last_continuous_block_ = true;
-  } else {
-    GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
-    return INTERNAL_ERROR;
   }
-  return SUCCESS;
+  return block;
 }
 
 MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges,
@@ -1191,8 +1133,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
   NodeIndexIO node_index_io(n, index, kOut);
   int64_t size = 0;
   auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
-  GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr);
-  GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
+  if (output_op_desc != nullptr) {
+    GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
+  }
   size_t no_align_size = 0;
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
                                  return nullptr, "Get no align size failed");
@@ -1203,14 +1146,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
     block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size);
     block->ref_count_++;
   } else {
-    // if ref input is variable, can not find symbol, must judge alone
-    int32_t reuse_in_index = -1;
-    if (GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) {
-      zero_memory_list_.emplace_back(n, kOutput, index, false);
-      GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index);
-      return nullptr;
-    }
-
     int64_t max_size = size;
     int64_t memory_type = RT_MEMORY_HBM;
     auto iter1 = anchor_to_symbol_.find(node_index_io.ToString());
@@ -1458,7 +1393,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
                   for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
                        ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
   if (IsContinuousOutput(node)) {
-    return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
+    (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
+    return SUCCESS;
   }
   for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
     int64_t size = 0;
@@ -1952,8 +1888,9 @@ Status BlockMemAssigner::Assign() {
 
 bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
   return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
-         (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
-         (node_type == ASSIGN) || (node_type == HVDWAIT);
+         (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) ||
+         (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
+         (node_type == HVDCALLBACKBROADCAST);
 }
 
 bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h
index 7e76081d..d514ca34 100755
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -420,11 +420,7 @@ class BlockMemAssigner : public MemAssigner {
 
   bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type);
 
-  void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n);
-
-  Status ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);
-
-  void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node);
+  MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);
 
   std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 7721739b..b7714c4a 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2099,6 +2099,12 @@ Status DavinciModel::SyncVarData() {
                            RT_MEMCPY_HOST_TO_DEVICE));
   }
 
+  for (auto op_desc : variable_op_list_) {
+    ret =
+        VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_);
+    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
+                     op_desc->GetName().c_str());
+  }
   return ret;
 }
 
@@ -2571,6 +2577,12 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
 ///
 Status DavinciModel::ReturnNoOutput(uint32_t data_id) {
   GELOGI("ReturnNoOutput model id:%u", model_id_);
+  for (auto op_desc : variable_op_list_) {
+    Status ret = VarManager::Instance(session_id_)
+                     ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_);
+    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
+                     op_desc->GetName().c_str());
+  }
 
   GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!");
   std::vector<ge::OutputTensorInfo> outputs;
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 030b864e..beb7cd42 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -93,7 +93,6 @@
 #include "graph/passes/unused_args_clean_pass.h"
 #include "graph/passes/global_step_insert_pass.h"
 #include "graph/passes/memcpy_addr_async_pass.h"
-#include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/build/label_allocator.h"
 #include "graph/utils/tensor_adapter.h"
 #include "inc/pass_manager.h"
@@ -2122,8 +2121,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
                                                new (std::nothrow) TransOpWithoutReshapeFusionPass))
   GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
                                                new (std::nothrow) TransOpBreadthFusionPass))
-  GE_CHK_STATUS_RET(
-      after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass));
 
   GE_TIMESTAMP_START(after_merge_passes);
   auto ret = after_merge_passes.Run(compute_graph);
diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc
index a67b917f..21747f42 100755
--- a/ge/graph/passes/hccl_memcpy_pass.cc
+++ b/ge/graph/passes/hccl_memcpy_pass.cc
@@ -28,157 +28,50 @@
 namespace {
 const int32_t kAnchorSize = 1;
 const int kAnchorNum = 0;
-const int32_t kAnchorAssignRefIndex = 0;
-const int32_t kAnchorAssignValueIndex = 1;
 const char *const kInputMutable = "_input_mutable";
 }  // namespace
 namespace ge {
 Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
-  Status ret = SUCCESS;
   GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID);
   for (const auto &node : graph->GetDirectNode()) {
     auto op_desc = node->GetOpDesc();
-    if (op_desc == nullptr) {
-      GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str());
-      return INTERNAL_ERROR;
-    }
-
-    ret = ContinuousInputProcess(graph, node);
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str());
-      return ret;
-    }
-
-    ret = MutableInputProcess(graph, node);
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str());
-      return ret;
-    }
-
-    ret = P2pmemInputProcess(graph, node);
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str());
-      return ret;
-    }
-
-  }
-  return ret;
-}
-
-// If node has _input_mutable attr, means input mem may be modified when op execute.
-// In order to avoid to affect another op execute with same input when data modified,
-// need to inset memcpy node between.
-// also works on situation that input is variable or const.
-Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
-  auto op_desc = node->GetOpDesc();
+    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
 
-  bool node_input_mutable = false;
-  if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
-    return SUCCESS;
-  }
-
-  if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) {
-    GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str());
-    return FAILED;
-  }
-  if (!node_input_mutable) {
-    return SUCCESS;
-  }
-
-  GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str());
-  for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
-    if (hccl_in_anchor == nullptr) {
+    bool node_input_mutable = false;
+    if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
       continue;
     }
-    auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
-    GE_CHECK_NOTNULL(src_out_anchor);
 
-    int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
-    if (src_out_anchor_size == kAnchorSize) {
-      // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
-      if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
-        Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
-        if (ret != SUCCESS) {
-          GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
-          return ret;
-        }
-      }
+    GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable),
+        GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED);
+    if (!node_input_mutable) {
       continue;
     }
 
-    Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
-    if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
-      return ret;
-    }
-  }
-  return SUCCESS;
-}
-
-// If broadcast input size is bigger than 1, and input from variable,
-// cause by broadcast input memory should be continuous,
-// another featuremap mem will be allocated for broadcast input.
-// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step.
-// In order to avoid move action out of model, use memcpy node instead of move action code.
-Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
-  auto op_desc = node->GetOpDesc();
-
-  bool is_input_continuous = false;
-  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
-
-  if (is_input_continuous && op_desc->GetInputsSize() > 1) {
-    GELOGI("continuous input op is:%s.", op_desc->GetName().c_str());
-    // if input size bigger than one, insert memcpy between var data for support continous mem alloc
+    GELOGI("hcom op is:%s.", op_desc->GetName().c_str());
     for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
       if (hccl_in_anchor == nullptr) {
         continue;
       }
       auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
-      if (src_out_anchor == nullptr) {
-        GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
-        return INTERNAL_ERROR;
-      }
-
-      if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
-        Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
-        if (ret != SUCCESS) {
-          GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
-          return ret;
+      GE_CHECK_NOTNULL(src_out_anchor);
+
+      int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
+      if (src_out_anchor_size == kAnchorSize) {
+        // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
+        NodePtr src_node = src_out_anchor->GetOwnerNode();
+        std::string src_type = src_node->GetType();
+        bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT);
+        if (check_src_type) {
+          Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
+          if (ret != SUCCESS) {
+            GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
+            return ret;
+          }
         }
+        continue;
       }
-    }
-  }
-  return SUCCESS;
-}
-
-// if input is var type, and node input need p2p mem, then memcpy should be insert between the two
-Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
-  auto op_desc = node->GetOpDesc();
-
-  vector<int64_t> input_memory_types;
-  (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types);
 
-  if (input_memory_types.empty()) {
-    return SUCCESS;
-  }
-
-  for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) {
-    if (input_memory_types[index] != RT_MEMORY_P2P_DDR) {
-      continue;
-    }
-
-    GELOGD("p2p input op is:%s.", op_desc->GetName().c_str());
-    auto hccl_in_anchor = node->GetInDataAnchor(index);
-    if (hccl_in_anchor == nullptr) {
-      continue;
-    }
-    auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
-    if (src_out_anchor == nullptr) {
-      GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
-      return INTERNAL_ERROR;
-    }
-
-    if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
       Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
       if (ret != SUCCESS) {
         GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
@@ -189,12 +82,8 @@ Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const No
   return SUCCESS;
 }
 
-bool HcclMemcpyPass::IsDataNode(const std::string& node_type) {
-  return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT);
-}
-
 ///
-/// @brief Add Identity Node
+/// @brief Add MemcpyAsync Node
 /// @param [in] ge::ComputeGraphPtr graph
 /// @param [in] ge::OutDataAnchorPtr in_node
 /// @return ge::NodePtr
@@ -212,20 +101,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O
   node_name = CheckDuplicateName(node_name);
   OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY);
   if (op_desc == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail.");
+    GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail.");
     return nullptr;
   }
-  GELOGI("Create Identity op:%s.", op_desc->GetName().c_str());
+  GELOGI("Create identity op:%s.", op_desc->GetName().c_str());
 
   graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
   if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail.");
+    GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail.");
     return nullptr;
   }
 
   ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
   if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail.");
+    GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail.");
     return nullptr;
   }
   // because history reason ,this pass can not do work after constant fold so mark it
@@ -233,7 +122,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O
 
   NodePtr memcpy_node = graph->AddNode(op_desc);
   if (memcpy_node == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Insert Identity node fail.");
+    GELOGE(INTERNAL_ERROR, "Insert identity node fail.");
     return nullptr;
   }
 
@@ -266,38 +155,7 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) {
 ///
 Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
                                             const InDataAnchorPtr &hccl_in_anchor) {
-  GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode());
-  GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode());
-
-  Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor);
-  if (ret != SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.",
-           src_out_anchor->GetOwnerNode()->GetName().c_str(),
-           hccl_in_anchor->GetOwnerNode()->GetName().c_str());
-    return ret;
-  }
-
-  ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor);
-  if (ret != SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.",
-           src_out_anchor->GetOwnerNode()->GetName().c_str(),
-           hccl_in_anchor->GetOwnerNode()->GetName().c_str());
-    return ret;
-  }
-  return SUCCESS;
-}
-
-///
-/// @brief Insert Identity node Between Hccl node and variable
-/// @param [in] ComputeGraphPtr graph
-/// @param [in] OutDataAnchorPtr src_out_anchor
-/// @param [in] InDataAnchorPtr hccl_in_anchor
-/// @return status
-///
-Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
-                                                const InDataAnchorPtr &hccl_in_anchor) {
-  GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(),
-    hccl_in_anchor->GetOwnerNode()->GetName().c_str());
+  GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str());
   NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor);
   GE_CHECK_NOTNULL(memcpy_node);
 
@@ -324,139 +182,6 @@ Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, co
   }
   return SUCCESS;
 }
-
-///
-/// @brief Insert assign node after broadcast node and variable to refresh variable data
-/// @param [in] ComputeGraphPtr graph
-/// @param [in] OutDataAnchorPtr var_out_anchor
-/// @param [in] InDataAnchorPtr hccl_in_anchor
-/// @return status
-///
-Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph,
-                                                        const OutDataAnchorPtr &var_out_anchor,
-                                                        const InDataAnchorPtr &hccl_in_anchor) {
-  if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) {
-    GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str());
-    return SUCCESS;
-  }
-
-  if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) {
-    GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str());
-    return SUCCESS;
-  }
-
-  GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(),
-    hccl_in_anchor->GetOwnerNode()->GetName().c_str());
-
-  for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) {
-    if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) {
-      GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str());
-      return SUCCESS;
-    }
-  }
-
-  NodePtr assign_node = CreateAssignNode(graph, var_out_anchor);
-  GE_CHECK_NOTNULL(assign_node);
-
-  OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx());
-  GE_CHECK_NOTNULL(hccl_out_anchor);
-
-  Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex));
-  if (ret != SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(),
-           assign_node->GetName().c_str());
-    return FAILED;
-  }
-
-  ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex));
-  if (ret != SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(),
-           assign_node->GetName().c_str());
-    return FAILED;
-  }
-
-  // add control edge between assign node and node after broadcast node
-  OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor();
-  GE_CHECK_NOTNULL(assign_out_control_anchor);
-
-  for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) {
-    if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) {
-      continue;
-    }
-    ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor());
-      if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(),
-             in_data_anchor->GetOwnerNode()->GetName().c_str());
-      return FAILED;
-    }
-  }
-
-  for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) {
-    if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) {
-      continue;
-    }
-    ret = assign_out_control_anchor->LinkTo(in_control_anchor);
-      if (ret != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(),
-             in_control_anchor->GetOwnerNode()->GetName().c_str());
-      return FAILED;
-    }
-  }
-  return SUCCESS;
-}
-
-///
-/// @brief create assign Node, add to graph
-/// @param [in] ge::ComputeGraphPtr graph
-/// @param [in] ge::OutDataAnchorPtr variable node out anchor
-/// @return ge::NodePtr
-///
-NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) {
-  GE_IF_BOOL_EXEC(graph == nullptr, return nullptr);
-  NodePtr pre_node = out_data_anchor->GetOwnerNode();
-  OpDescPtr pre_op_desc = pre_node->GetOpDesc();
-  if (pre_op_desc == nullptr) {
-    GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid.");
-    return nullptr;
-  }
-
-  std::string node_name = pre_node->GetName() + "_" + ASSIGN;
-  node_name = CheckDuplicateName(node_name);
-  OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), ASSIGN);
-  if (op_desc == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail.");
-    return nullptr;
-  }
-  GELOGI("Create Assign op:%s.", op_desc->GetName().c_str());
-
-  graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
-  if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail.");
-    return nullptr;
-  }
-
-  ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
-  if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail.");
-    return nullptr;
-  }
-
-  ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
-  if (ret != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail.");
-    return nullptr;
-  }
-
-  NodePtr assign_node = graph->AddNode(op_desc);
-  if (assign_node == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Insert Identity node fail.");
-    return nullptr;
-  }
-
-  return assign_node;
-}
-
-
 ///
 /// @brief Clear Status, used for subgraph pass
 /// @return SUCCESS
diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h
index 7e52708a..e73a5483 100755
--- a/ge/graph/passes/hccl_memcpy_pass.h
+++ b/ge/graph/passes/hccl_memcpy_pass.h
@@ -32,28 +32,11 @@ class HcclMemcpyPass : public GraphPass {
  private:
   NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor);
 
-  NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor);
-
   std::string CheckDuplicateName(const std::string &node_name);
 
   Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
           const InDataAnchorPtr &hccl_in_anchor);
 
-  Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
-                                  const InDataAnchorPtr &hccl_in_anchor);
-
-  Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph,
-  	                                      const OutDataAnchorPtr &src_out_anchor,
-                                          const InDataAnchorPtr &hccl_in_anchor);
-
-  Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node);
-
-  Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node);
-
-  Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node);
-
-  bool IsDataNode(const std::string& node_type);
-
   std::unordered_map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 32f877cf..6bb3105c 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -51,6 +51,7 @@
 #include "graph/passes/for_pass.h"
 #include "graph/passes/guarantee_const_pass.h"
 #include "graph/passes/hccl_group_pass.h"
+#include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/infershape_pass.h"
 #include "graph/passes/net_output_pass.h"
@@ -1732,6 +1733,8 @@ Status GraphPrepare::PrepareOptimize() {
   PassManager graph_pass;
   try {
     (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass);
+    // todo 临时把hccl的memcpy插入放到图准备，为了防止其多插memcpy
+    (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass);
   } catch (std::bad_alloc &e) {
     GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs.");
     return INTERNAL_ERROR;

From daefed5c5c8adda0215320fce522e658973e8ff9 Mon Sep 17 00:00:00 2001
From: zhangxiaokun <zhang.xiaokun@huawei.com>
Date: Wed, 30 Dec 2020 17:02:10 +0800
Subject: [PATCH 04/54] fix unknown shape aicpu

---
 ge/graph/load/new_model_manager/davinci_model.cc | 6 +++---
 ge/graph/load/new_model_manager/davinci_model.h  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 7721739b..a367d334 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2859,8 +2859,8 @@ void DavinciModel::SetTotalIOAddrs(const vector<void *> &io_addrs) {
   }
 }
 
-Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
-  if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) {
+Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args) {
+  if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_) && update_args) {
     for (size_t i = 0; i < total_io_addrs.size(); ++i) {
       total_io_addrs[i] = GetRunAddress(total_io_addrs[i]);
     }
@@ -2904,7 +2904,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
   } else {
     total_io_addrs_ = orig_total_io_addrs_;
   }
-  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
+  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
 
   if (total_args_size_ == 0) {
     GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 906c0548..76c5c8f0 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -531,7 +531,7 @@ class DavinciModel {
   Status MallocKnownArgs();
   Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
   Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
-  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
+  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
   void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }
 
   Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);

From 610828561cbd1c0a37b8c2ca505f22884d7b0533 Mon Sep 17 00:00:00 2001
From: wangxiaotian22 <wangxiaotian4@huawei.com>
Date: Wed, 30 Dec 2020 17:42:38 +0800
Subject: [PATCH 05/54] fill ut

---
 .../ge/graph/load/davinci_model_unittest.cc   | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 3cd0455d..34d45269 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -282,4 +282,42 @@ TEST_F(UtestDavinciModel, init_unknown) {
   const vector<void *> outputs = { &virtual_addr  };
   EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS);
 }
+
+TEST_F(UtestDavinciModel, ReturnNoOutput_test) {
+  DavinciModel model(0, nullptr);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr var1 = CreateOpDesc("var1", VARIABLE);
+  var1->AddInputDesc(tensor);
+  var1->AddOutputDesc(tensor);
+  var1->SetInputOffset({1024});
+  var1->SetOutputOffset({1024});
+
+  model.variable_op_list_.push_back(var1);
+
+
+  EXPECT_EQ(model.ReturnNoOutput(model), PARAM_INVALID);
+}
+
+TEST_F(UtestDavinciModel, SyncVarData_test) {
+  DavinciModel model(0, nullptr);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr var1 = CreateOpDesc("var1", VARIABLE);
+  var1->AddInputDesc(tensor);
+  var1->AddOutputDesc(tensor);
+  var1->SetInputOffset({1024});
+  var1->SetOutputOffset({1024});
+
+  model.variable_op_list_.push_back(var1);
+
+  EXPECT_NE(model.SyncVarData(model), SUCCESS);
+
+}
+
+
 }  // namespace ge

From 6d94878eaf69bfe43e28385ef373d381ccd06d6e Mon Sep 17 00:00:00 2001
From: wangxiaotian22 <wangxiaotian4@huawei.com>
Date: Wed, 30 Dec 2020 17:56:28 +0800
Subject: [PATCH 06/54] fix ut

---
 tests/ut/ge/graph/load/davinci_model_unittest.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 34d45269..eda3cb15 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -298,7 +298,7 @@ TEST_F(UtestDavinciModel, ReturnNoOutput_test) {
   model.variable_op_list_.push_back(var1);
 
 
-  EXPECT_EQ(model.ReturnNoOutput(model), PARAM_INVALID);
+  EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID);
 }
 
 TEST_F(UtestDavinciModel, SyncVarData_test) {
@@ -315,8 +315,7 @@ TEST_F(UtestDavinciModel, SyncVarData_test) {
 
   model.variable_op_list_.push_back(var1);
 
-  EXPECT_NE(model.SyncVarData(model), SUCCESS);
-
+  EXPECT_NE(model.SyncVarData(), SUCCESS);
 }
 
 

From 2f4351652426af191eb6c718a8923f69bee6289f Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Wed, 30 Dec 2020 19:44:50 +0800
Subject: [PATCH 07/54] For dynamic shape compile.

---
 ge/graph/build/graph_builder.cc | 84 +++++++++++++++++++++++++++++----
 ge/graph/build/graph_builder.h  |  1 +
 ge/graph/build/model_builder.h  |  4 +-
 ge/single_op/single_op_model.cc |  1 +
 4 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc
index 87d2a206..ee9be124 100644
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -15,6 +15,7 @@
  */
 
 #include "graph/build/graph_builder.h"
+#include "graph/build/memory/graph_mem_assigner.h"
 #include "common/ge/ge_util.h"
 #include "common/helper/model_helper.h"
 #include "graph/build/logical_stream_allocator.h"
@@ -197,10 +198,8 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo
     return MEMALLOC_FAILED;
   }
   GeModelPtr ge_model_ptr = nullptr;
-  bool is_dynamic_shape = false;
-  // To be compatible with the old process, do not verify the return value temporarily.
-  (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
-  if (is_dynamic_shape) {
+
+  if (comp_graph->GetGraphUnknownFlag()) {
     GE_CHK_STATUS_RET(
         BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id),
         "Build for dynamic shape graph failed.");
@@ -270,16 +269,78 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v
   return SUCCESS;
 }
 
+Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) {
+  for (auto &node : comp_graph->GetDirectNode()) {
+    GE_CHECK_NOTNULL(node);
+    auto op_desc = node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    auto num_inputs = op_desc->GetInputsSize();
+    std::vector<int64_t> input_offsets(num_inputs, 0);
+    int valid_input_index = -1;
+    for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) {
+      auto in_anchor = node->GetInDataAnchor(i);
+      auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
+      if (peer_out_anchor == nullptr) {
+        continue;
+      }
+
+      ++valid_input_index;
+      auto peer_node = peer_out_anchor->GetOwnerNode();
+      if (peer_node == nullptr) {
+        continue;
+      }
+
+      if (peer_node->GetType() != CONSTANT) {
+        continue;
+      }
+
+      std::vector<GeTensorPtr> weights = OpDescUtils::MutableWeights(peer_node);
+      if (weights.empty()) {
+        GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
+        return FAILED;
+      }
+      GeTensorPtr weight = weights[0];
+      GE_CHECK_NOTNULL(weight);
+      int64_t input_offset = 0;
+      (void) TensorUtils::GetDataOffset(weight->MutableTensorDesc(), input_offset);
+      // valid_input_index must smaller than num_inputs
+      input_offsets[valid_input_index] = input_offset;
+      GELOGD("[%s] input[%u] is const, offset = %ld", node->GetName().c_str(), valid_input_index, input_offset);
+    }
+
+    op_desc->SetInputOffset(input_offsets);
+    std::vector<int64_t> output_offsets(op_desc->GetOutputsSize(), 0);
+    op_desc->SetOutputOffset(output_offsets);
+  }
+  return SUCCESS;
+}
+
 Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                                uint64_t session_id) {
   GELOGI("Begin to build unknown shape graph[%s].", comp_graph->GetName().c_str());
+  Graph2SubGraphInfoList subgraph_map;
+  ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
+  GE_DUMP(comp_graph, "BeforePreBuildModel");
+  GE_TIMESTAMP_START(PreBuildModel);
+  GE_CHK_STATUS_RET(builder.PreBuildModel(),  "Graph[%s] builder PreBuildModel() return fail.",
+                    comp_graph->GetName().c_str());
+  GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
+  GE_DUMP(comp_graph, "AfterPreBuildModel");
+
   GE_TIMESTAMP_START(CalcOpParam);
   GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.",
                     comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam");
   GE_DUMP(comp_graph, "AfterCalcOpParam");
-  Graph2SubGraphInfoList subgraph_map;
-  ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
+
+  GE_TIMESTAMP_START(SetConstantInputOffset);
+  GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph),
+                    "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str());
+  GE_TIMESTAMP_END(SetConstantInputOffset);
+  GE_TIMESTAMP_START(MergeWeights);
+  GE_CHK_STATUS_RET(MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str());
+  GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights");
+
   ModelPtr model_ptr = MakeShared<ge::Model>();
   if (model_ptr == nullptr) {
     return MEMALLOC_FAILED;
@@ -375,10 +436,15 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                         op_desc->GetName().c_str());
     }
   }
-  //
-  for (auto &sub_graph : comp_graph->GetAllSubgraphs()) {
+
+  auto all_graphs = comp_graph->GetAllSubgraphs();
+  if (all_graphs.empty()) {
+    all_graphs.push_back(comp_graph);
+  }
+  for (auto &sub_graph : all_graphs) {
     // exclude functional subgraph in known subgraph
-    if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
+    if (sub_graph->GetParentGraph() != nullptr && sub_graph->GetParentGraph() != comp_graph &&
+        !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
       continue;
     }
 
diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h
index 329f3ebc..b828a80d 100644
--- a/ge/graph/build/graph_builder.h
+++ b/ge/graph/build/graph_builder.h
@@ -67,6 +67,7 @@ class GraphBuilder {
                                  GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
   Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                    uint64_t session_id = INVALID_SESSION_ID);
+  Status SetConstantInputOffset(ComputeGraphPtr &comp_graph);
   Status AddOutputMemTypeForNode(const NodePtr &node);
   Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                               uint64_t session_id = INVALID_SESSION_ID);
diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index de079768..12420614 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -55,13 +55,13 @@ class ModelBuilder {
 
   ge::Buffer GetWeightBuffer() const;
 
+  Status MergeWeights();
+
  protected:
   void AddNodeInputProperty();
 
   void ClearOriginalFormat();
 
-  Status MergeWeights();
-
  private:
   bool SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector<bool> &is_input_const);
 
diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc
index 25bf6855..2a1a14e6 100755
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -477,6 +477,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   single_op.num_inputs_ = data_ops_.size();
   single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
   GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
+  model_params_.memory_size = UINT_MAX;
   return BuildTaskListForDynamicOp(single_op);
 }
 }  // namespace ge

From 77b2d66ec7e980fb044772bee20d59133b46a74e Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Wed, 30 Dec 2020 19:49:30 +0800
Subject: [PATCH 08/54] For dynamic shape compile.

---
 ge/graph/build/graph_builder.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc
index ee9be124..dce40c3e 100644
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -322,7 +322,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
   ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
   GE_DUMP(comp_graph, "BeforePreBuildModel");
   GE_TIMESTAMP_START(PreBuildModel);
-  GE_CHK_STATUS_RET(builder.PreBuildModel(),  "Graph[%s] builder PreBuildModel() return fail.",
+  GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.",
                     comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
   GE_DUMP(comp_graph, "AfterPreBuildModel");
@@ -336,9 +336,9 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
   GE_TIMESTAMP_START(SetConstantInputOffset);
   GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph),
                     "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str());
-  GE_TIMESTAMP_END(SetConstantInputOffset);
+  GE_TIMESTAMP_END(SetConstantInputOffset, "GraphBuilder::SetConstantInputOffset");
   GE_TIMESTAMP_START(MergeWeights);
-  GE_CHK_STATUS_RET(MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str());
+  GE_CHK_STATUS_RET(builder.MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights");
 
   ModelPtr model_ptr = MakeShared<ge::Model>();

From 7b87558f2fc8d6eff511d9745516293bf8188539 Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Wed, 30 Dec 2020 21:36:26 +0800
Subject: [PATCH 09/54] 	modified:   ge/graph/manager/graph_manager.cc 
 modified:   ge/graph/preprocess/graph_preprocess.cc 	modified:  
 ge/graph/preprocess/graph_preprocess.h 	modified:  
 inc/external/ge/ge_api_types.h

---
 ge/graph/manager/graph_manager.cc       |   4 +-
 ge/graph/preprocess/graph_preprocess.cc | 135 ++++++++++++++++++++++--
 ge/graph/preprocess/graph_preprocess.h  |   6 +-
 inc/external/ge/ge_api_types.h          |   5 +
 4 files changed, 138 insertions(+), 12 deletions(-)

diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 030b864e..1aee79a4 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -687,7 +687,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node,
   CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
   GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph);
   GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph);
-  GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph,
+  GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node, inputs, compute_graph,
                        session_id);
   GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph);
 
@@ -1173,7 +1173,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const
   auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
   GE_CHECK_NOTNULL(compute_graph);
 
-  GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs,
+  GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node, inputs,
                        compute_graph, session_id);
 
   for (auto &node : compute_graph->GetAllNodes()) {
diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 32f877cf..3f4c7f16 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -898,6 +898,117 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
   }
   return SUCCESS;
 }
+/**
+ * Parser shape_range from string to vector
+ * shape_range from option normally is "[1~20],[3],[3~6],[-1]"
+ * @param shape_range
+ */
+void ParseDynamicInputShapeRange(const std::string &shape_range,
+                                 std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) {
+  if (shape_range.empty() || shape_range.size() < 2) {
+    GELOGW("Shape range %s is invalid.", shape_range);
+    return;
+  }
+  // different parameter sets are split by ';'
+  vector<string> shape_set = ge::StringUtils::Split(shape_range, ']');
+  if (shape_set.empty()) {
+    return;
+  }
+  for (auto shape_str : shape_set) {
+    if (shape_str.empty()) {
+      continue;
+    }
+    if (ge::StringUtils::StartWith(shape_str, "[")) {
+      shape_str = shape_str.substr(1, shape_str.size());
+    }
+    if (ge::StringUtils::StartWith(shape_str, ",")) {
+      shape_str = shape_str.substr(2, shape_str.size());
+    }
+    std::vector<std::pair<int64_t, int64_t>> range_of_single;
+    vector<string> range_set = ge::StringUtils::Split(shape_str, ',');
+    for (auto range_str : range_set) {
+      vector<string> pair_set = ge::StringUtils::Split(range_str, '~');
+      pair<int64_t, int64_t> range_pair;
+      if (pair_set.size() == 1) {
+        auto range_value = atoi(pair_set.at(0).c_str());
+        if (range_value < 0) {
+          range_pair = std::make_pair(1, range_value);
+        } else {
+          range_pair = std::make_pair(range_value, range_value);
+        }
+      } else if (pair_set.size() == 2) {
+        auto range_left = atoi(pair_set.at(0).c_str());
+        auto range_right = atoi(pair_set.at(1).c_str());
+        range_pair = std::make_pair(range_left, range_right);
+      }
+      range_of_single.emplace_back(range_pair);
+    }
+    range.emplace_back(range_of_single);
+  }
+}
+
+Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option,
+                                 vector<vector<std::pair<int64_t, int64_t>>> &range_vec) {
+  auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
+  if (mode_iter == graph_option.end()) {
+    GELOGD("Graph Option: Can not find %s option in graph options.", OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
+    return SUCCESS;
+  }
+  GELOGD("Graph Option: dynamic_input_mode value is %s.", mode_iter->second.c_str());
+  if (mode_iter->second != "dynamic_execute") {
+    return SUCCESS;
+  }
+  auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE);
+  if (iter == graph_option.end()) {
+    GELOGE(PARAM_INVALID, "Graph option %s is required when %s is dynamic_execute", OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE,
+           OPTION_EXEC_DYNAMIC_EXECUTE_MODE);
+    return PARAM_INVALID;
+  }
+  GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str());
+  ParseDynamicInputShapeRange(iter->second, range_vec);
+  if (range_vec.size() != user_input.size()) {
+    GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(),
+           user_input.size());
+    return PARAM_INVALID;
+  }
+  return SUCCESS;
+}
+
+Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
+                                    const ector<vector<std::pair<int64_t, int64_t>>> &range_vec, OpDescPtr &op,
+                                    GeTensorDesc &desc) {
+  auto unkown_shape = desc.GetShape();
+  auto shape_range = range_vec.at(index);
+  for (size_t i = 0; i < unkown_shape.GetDimNum(); ++i) {
+    if (shape_range.at(i).first == shape_range.at(i).second) {
+      unkown_shape.SetDim(i, shape_range.at(i).first);
+    } else {
+      unkown_shape.SetDim(i, -1);
+    }
+  }
+  desc.SetShape(unkown_shape);
+  desc.SetShapeRange(shape_range);
+  int64_t dynamic_shape_size = 1;
+  for (const auto range_pair : range_vec.at(index)) {
+    FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second);
+    dynamic_shape_size *= range_pair.second;
+  }
+  auto data_type_size = GetSizeByDataType(desc.GetDataType());
+  if (data_type_size < 0) {
+    GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.",
+           TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str());
+    return PARAM_INVALID;
+  }
+  FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size);
+  dynamic_shape_size *= data_type_size;
+  GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size);
+  ge::TensorUtils::SetSize(desc, dynamic_shape_size);
+  graphStatus graph_ret = op->UpdateInputDesc(0, desc);
+  GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
+  graph_ret = op->UpdateOutputDesc(0, desc);
+  GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
+  return SUCCESS;
+}
 }  // namespace
 
 GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {}
@@ -1102,7 +1213,11 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) {
   return SUCCESS;
 }
 
-Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
+Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) {
+  // Get shape range of input in dynamic_execute mode
+  vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec;
+  auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec);
+  GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode.");
   compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format));
   for (NodePtr &input_node : compute_graph_->GetDirectNode()) {
     GE_CHECK_NOTNULL(input_node);
@@ -1185,6 +1300,12 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
         return graph_ret;
       }
 
+      if (!dynamic_shape_range_vec.empty()) {
+        ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc);
+        GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str());
+        continue;
+      }
+
       if (!options_.train_graph_flag) {
         Status ret = AdjustDataOpOutput(input_node);
         GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "AdjustDataOpOutput fail, ret:%u", ret); return ret);
@@ -1358,17 +1479,17 @@ Status GraphPrepare::SaveOriginalGraphToOmModel() {
     GELOGI("Prepare %s on graph %s success.", name, compute_graph->GetName().c_str()); \
   } while (0)
 
-Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vector<GeTensor> &user_input,
+Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std::vector<GeTensor> &user_input,
                                      ge::ComputeGraphPtr &compute_graph, uint64_t session_id) {
-  GE_CHECK_NOTNULL(graph);
+  GE_CHECK_NOTNULL(graph_node->GetGraph());
   GE_CHECK_NOTNULL(compute_graph);
 
   GetLocalOmgContext().type = static_cast<domi::FrameworkType>(options_.framework_type);
-  const Graph &const_graph = *graph;
+  const Graph &const_graph = *graph_node->GetGraph();
 
   PP_RUN("Init", Init, const_graph, session_id);
   PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE);
-  PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input);
+  PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetGraph());
   PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation);
   PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput);
   PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_);
@@ -1831,7 +1952,7 @@ Status GraphPrepare::ProcessNetOutput() {
   return SUCCESS;
 }
 
-Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input) {
+Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) {
   compute_graph_->SetInputSize(user_input.size());
   if (user_input.empty()) {
     return SUCCESS;
@@ -1843,7 +1964,7 @@ Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input
     return ret;
   }
 
-  ret = UpdateInput(user_input);
+  ret = UpdateInput(user_input, graph_option);
   if (ret != SUCCESS) {
     GELOGE(ret, "UpdateInput fail, ret:%u", ret);
     return ret;
diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h
index a3bbf433..de755418 100755
--- a/ge/graph/preprocess/graph_preprocess.h
+++ b/ge/graph/preprocess/graph_preprocess.h
@@ -45,7 +45,7 @@ class GraphPrepare {
   virtual ~GraphPrepare();
   GraphPrepare(const GraphPrepare &in) = delete;
   GraphPrepare &operator=(const GraphPrepare &in) = delete;
-  Status PrepareDynShape(ConstGraphPtr graph,
+  Status PrepareDynShape(const GraphNodePtr &graph_node,
                          const std::vector<GeTensor> &user_input,
                          ge::ComputeGraphPtr &compute_graph,
                          uint64_t session_id = 0);
@@ -63,8 +63,8 @@ class GraphPrepare {
   Status CheckRefOp();
   Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode);
   Status AdjustDataOpOutput(const NodePtr &node);
-  Status UpdateInput(const std::vector<GeTensor> &user_input);
-  Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input);
+  Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
+  Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option);
   Status CheckConstOp();
   Status VerifyConstOp(const NodePtr &node);
   Status CheckUserInput(const std::vector<GeTensor> &user_input);
diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index d0f2105f..250252f9 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -61,6 +61,11 @@ const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
 const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
 const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
 const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
+// Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input,
+// ge.exec.dynamicGraphExecuteMode, dynamic_execute[default]
+const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
+const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
+const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
 
 // Option key: memory init
 const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";

From 7bf75b0f6722199da29ac915e99212a238986af9 Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Wed, 30 Dec 2020 21:48:45 +0800
Subject: [PATCH 10/54] 	modified:   ge/graph/preprocess/graph_preprocess.cc

---
 ge/graph/preprocess/graph_preprocess.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 3f4c7f16..d12be957 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -906,7 +906,7 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
 void ParseDynamicInputShapeRange(const std::string &shape_range,
                                  std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) {
   if (shape_range.empty() || shape_range.size() < 2) {
-    GELOGW("Shape range %s is invalid.", shape_range);
+    GELOGW("Shape range %s is invalid.", shape_range.c_str());
     return;
   }
   // different parameter sets are split by ';'
@@ -975,7 +975,7 @@ Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const
 }
 
 Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
-                                    const ector<vector<std::pair<int64_t, int64_t>>> &range_vec, OpDescPtr &op,
+                                    const vector<vector<std::pair<int64_t, int64_t>>> &range_vec, OpDescPtr &op,
                                     GeTensorDesc &desc) {
   auto unkown_shape = desc.GetShape();
   auto shape_range = range_vec.at(index);

From e88abcf961e33ee9bc5d3da234b0355586224868 Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Wed, 30 Dec 2020 22:18:01 +0800
Subject: [PATCH 11/54] 	modified:   ge/graph/preprocess/graph_preprocess.cc

---
 ge/graph/preprocess/graph_preprocess.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index d12be957..c45f4db6 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -37,6 +37,7 @@
 #include "graph/passes/addn_pass.h"
 #include "graph/passes/aicpu_constant_folding_pass.h"
 #include "graph/passes/assert_pass.h"
+#include "ge/ge_api_types.h"
 #ifdef ONLY_COMPILE_OPEN_SRC
 #include "graph/passes/assign_remove_pass.h"
 #endif
@@ -1489,7 +1490,7 @@ Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std::
 
   PP_RUN("Init", Init, const_graph, session_id);
   PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE);
-  PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetGraph());
+  PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetOptions());
   PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation);
   PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput);
   PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_);

From fc748110910c7bda20386da578d418b2e59774e4 Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Thu, 31 Dec 2020 10:36:41 +0800
Subject: [PATCH 12/54] Custom pass register.

---
 ge/graph/manager/graph_manager.cc | 15 +++++++++++++++
 ge/graph/manager/graph_manager.h  |  1 +
 metadef                           |  2 +-
 parser                            |  2 +-
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index beb7cd42..46aeaebc 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -101,6 +101,7 @@
 #include "graph/common/local_context.h"
 #include "graph/common/omg_util.h"
 #include "common/formats/utils/formats_trans_utils.h"
+#include "external/register/register_pass.h"
 
 namespace {
 const char *const kSummary = "Summary";
@@ -765,10 +766,24 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint
   return SUCCESS;
 }
 
+Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {
+  ConstGraphPtr const_graph = graph->node->GetGraph();
+  auto comp_graph = GraphUtils::GetComputeGraph(*const_graph);
+  GE_DUMP(compute_graph, "RunCustomPassBegin");
+
+  GE_TIMESTAMP_START(RunCustomPass);
+  GraphPtr graph = std::const_pointer_cast<Graph>(const_graph);
+  GE_CHK_STATUS_RET(CustomPassHelper::Instance()->Run(graph), "Graph[%s] run custom pass fail.",
+                    comp_graph->GetName().c_str());
+  GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass");
+  return SUCCESS;
+}
+
 Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs,
                             GeRootModelPtr &ge_root_model, uint64_t session_id) {
   GE_CHECK_NOTNULL(graph_node);
   GE_CHECK_NOTNULL(graph_node->GetGraph());
+  GE_CHK_STATUS_RET_NOLOG(RunCustomPass(graph_node));
   auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
   GE_CHECK_NOTNULL(compute_graph);
   compute_graph->SetSessionID(session_id);
diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h
index d2887c4c..32de7eac 100644
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -226,6 +226,7 @@ class GraphManager {
   void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);
   Status ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> &dynamic_nodes,
                                                const std::vector<InputTensorInfo> &input_tensor);
+  Status RunCustomPass(const GraphNodePtr &graph_node);
   Status PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, GeRootModelPtr &ge_root_model,
                 uint64_t session_id = INVALID_SESSION_ID);
 
diff --git a/metadef b/metadef
index 11c6cf29..37a90f0d 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db
+Subproject commit 37a90f0dfd797306e99ec32a688be32a9ad835a4
diff --git a/parser b/parser
index 99437c39..5b93b050 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit 99437c39d26624a14060307366a96b79b1d439c3
+Subproject commit 5b93b050dd7ca5b77c3001a790031d877fa10956

From 0566c6dc3f91e72425d405ae45e55cfd1bc5fb46 Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Thu, 31 Dec 2020 10:52:24 +0800
Subject: [PATCH 13/54] Custom pass register.

---
 ge/graph/manager/graph_manager.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 46aeaebc..c102ec2e 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -769,7 +769,7 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint
 Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {
   ConstGraphPtr const_graph = graph->node->GetGraph();
   auto comp_graph = GraphUtils::GetComputeGraph(*const_graph);
-  GE_DUMP(compute_graph, "RunCustomPassBegin");
+  GE_DUMP(comp_graph, "RunCustomPassBegin");
 
   GE_TIMESTAMP_START(RunCustomPass);
   GraphPtr graph = std::const_pointer_cast<Graph>(const_graph);

From b706aa1da3044d6fb2c02951b190e70f1683433d Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Thu, 31 Dec 2020 11:00:04 +0800
Subject: [PATCH 14/54] Custom pass register.

---
 ge/graph/manager/graph_manager.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index c102ec2e..706908af 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -767,7 +767,7 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint
 }
 
 Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {
-  ConstGraphPtr const_graph = graph->node->GetGraph();
+  ConstGraphPtr const_graph = graph_node->GetGraph();
   auto comp_graph = GraphUtils::GetComputeGraph(*const_graph);
   GE_DUMP(comp_graph, "RunCustomPassBegin");
 

From af230762e14c1cdfa31e7c691115e6e51bc9ec83 Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Thu, 31 Dec 2020 11:17:24 +0800
Subject: [PATCH 15/54] Custom pass register.

---
 tests/ut/ge/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 1f6c6837..dcf389c0 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -121,6 +121,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc"
     "${GE_CODE_DIR}/metadef/ops/op_imp.cpp"
     "${GE_CODE_DIR}/metadef/register/register.cpp"
+    "${GE_CODE_DIR}/metadef/register/register_pass.cpp"
     "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp"
     "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp"
     "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp"

From 7b6461170d3ce03b400ea975f75eac3a22479cba Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Thu, 31 Dec 2020 14:31:59 +0800
Subject: [PATCH 16/54] fix for json dump

---
 ge/analyzer/analyzer.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ge/analyzer/analyzer.cc b/ge/analyzer/analyzer.cc
index 29181384..c63a6008 100755
--- a/ge/analyzer/analyzer.cc
+++ b/ge/analyzer/analyzer.cc
@@ -217,10 +217,15 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_
 
   json jsn;
   GraphInfoToJson(jsn, *graph_info);
-  json_file_ << jsn.dump(kJsonDumpLevel) << std::endl;
+  bool ret_failed = false;
+  try {
+    json_file_ << jsn.dump(kJsonDumpLevel) << std::endl;
+  } catch (nlohmann::detail::type_error &e) {
+    GELOGE(FAILED, "analyzer file [%s] failed because [%s]", json_file_name_.c_str(), e.what());
+    ret_failed = true;
+  }
   json_file_.close();
-
-  return SUCCESS;
+  return ret_failed ? FAILED : SUCCESS;
 }
 
 ge::Status Analyzer::DoAnalyze(DataInfo &data_info) {

From 3e6b21f6c17b54a40cd0e59c7b321f71775a402f Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Thu, 31 Dec 2020 14:47:44 +0800
Subject: [PATCH 17/54] 	modified:   ge/graph/preprocess/graph_preprocess.cc

---
 ge/graph/preprocess/graph_preprocess.cc | 114 ++++++++++++++++--------
 1 file changed, 75 insertions(+), 39 deletions(-)

diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index c45f4db6..57c2542a 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -901,51 +901,74 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
 }
 /**
  * Parser shape_range from string to vector
- * shape_range from option normally is "[1~20],[3],[3~6],[-1]"
+ * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]"
  * @param shape_range
  */
-void ParseDynamicInputShapeRange(const std::string &shape_range,
-                                 std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) {
-  if (shape_range.empty() || shape_range.size() < 2) {
+Status ParseDynamicInputShapeRange(const std::string &shape_range,
+                                   std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) {
+  if (shape_range.size() < 2) {
     GELOGW("Shape range %s is invalid.", shape_range.c_str());
     return;
   }
-  // different parameter sets are split by ';'
-  vector<string> shape_set = ge::StringUtils::Split(shape_range, ']');
-  if (shape_set.empty()) {
-    return;
+  // different shape_ragne of single input are split by ']'
+  vector<string> shape_range_set = ge::StringUtils::Split(shape_range, ']');
+  if (shape_range_set.empty()) {
+    GELOGE("Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str());
+    return PARAM_INVALID;
   }
-  for (auto shape_str : shape_set) {
-    if (shape_str.empty()) {
-      continue;
-    }
-    if (ge::StringUtils::StartWith(shape_str, "[")) {
-      shape_str = shape_str.substr(1, shape_str.size());
+  for (const auto &shape_range_str : shape_range_set) {
+    if (shape_range_str.empty()) {
+      GELOGE("Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+             shape_range.c_str());
+      return PARAM_INVALID;
     }
-    if (ge::StringUtils::StartWith(shape_str, ",")) {
-      shape_str = shape_str.substr(2, shape_str.size());
+    // trim start bytes, after that, single input should be "1~20,3,3~6,-1"
+    if (ge::StringUtils::StartWith(shape_range_str, "[")) {
+      shape_range_str = shape_range_str.substr(1, shape_range_str.size());
+    } else if (ge::StringUtils::StartWith(shape_range_str, ",")) {
+      shape_range_str = shape_range_str.substr(2, shape_range_str.size());
+    } else {
+      GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+             shape_range.c_str());
+      return PARAM_INVALID;
     }
-    std::vector<std::pair<int64_t, int64_t>> range_of_single;
-    vector<string> range_set = ge::StringUtils::Split(shape_str, ',');
-    for (auto range_str : range_set) {
-      vector<string> pair_set = ge::StringUtils::Split(range_str, '~');
+    // parse shape_range of single input. eg. "1~20,3,3~6,-1"
+    std::vector<std::pair<int64_t, int64_t>> range_of_single_input;
+    vector<string> dim_range_set = ge::StringUtils::Split(shape_range_str, ',');
+    for (const auto &range_pair_str : dim_range_set) {
+      vector<string> range_pair_set = ge::StringUtils::Split(range_pair_str, '~');
       pair<int64_t, int64_t> range_pair;
-      if (pair_set.size() == 1) {
-        auto range_value = atoi(pair_set.at(0).c_str());
+      if (range_pair_set.size() == 1) {
+        // fix dim
+        auto range_value = stol(range_pair_set.at(0).c_str());
         if (range_value < 0) {
           range_pair = std::make_pair(1, range_value);
         } else {
           range_pair = std::make_pair(range_value, range_value);
         }
-      } else if (pair_set.size() == 2) {
-        auto range_left = atoi(pair_set.at(0).c_str());
-        auto range_right = atoi(pair_set.at(1).c_str());
-        range_pair = std::make_pair(range_left, range_right);
+      } else if (range_pair_set.size() == 2) {
+        // unknown dim, should get range.
+        try {
+          auto range_left = stol(range_pair_set.at(0).c_str());
+          auto range_right = stol(range_pair_set.at(1).c_str());
+          range_pair = std::make_pair(range_left, range_right);
+        } catch (const std::invalid_argument) {
+          GELOGE(
+            "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
+            "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+            shape_range.c_str());
+          return PARAM_INVALID;
+        }
+      } else {
+        GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+               shape_range.c_str());
+        return PARAM_INVALID;
       }
-      range_of_single.emplace_back(range_pair);
+      range_of_single_input.emplace_back(range_pair);
     }
-    range.emplace_back(range_of_single);
+    range.emplace_back(range_of_single_input);
   }
+  return SUCCESS;
 }
 
 Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option,
@@ -966,7 +989,8 @@ Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const
     return PARAM_INVALID;
   }
   GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str());
-  ParseDynamicInputShapeRange(iter->second, range_vec);
+  auto ret = ParseDynamicInputShapeRange(iter->second, range_vec);
+  GE_CHK_STATUS_RET(ret, "Parse dynamic input shape range failed.");
   if (range_vec.size() != user_input.size()) {
     GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(),
            user_input.size());
@@ -978,18 +1002,30 @@ Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const
 Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
                                     const vector<vector<std::pair<int64_t, int64_t>>> &range_vec, OpDescPtr &op,
                                     GeTensorDesc &desc) {
-  auto unkown_shape = desc.GetShape();
-  auto shape_range = range_vec.at(index);
-  for (size_t i = 0; i < unkown_shape.GetDimNum(); ++i) {
-    if (shape_range.at(i).first == shape_range.at(i).second) {
-      unkown_shape.SetDim(i, shape_range.at(i).first);
+  auto origin_shape = desc.GetShape();
+  auto current_shape_range_vec = range_vec.at(index);
+  if (current_shape_range_vec.size() != origin_shape.GetDimNum()) {
+    GELOGE(PARAM_INVALID, "Given shape_range dim num is %zu, current dim num is %zu, not match.Pleace Check.",
+           current_shape_range_vec.size(), origin_shape.GetDimNum());
+    return PARAM_INVALID;
+  }
+  for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) {
+    if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) {
+      // given shape_range is known dim, check is same as origin or not
+      if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) {
+        GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.",
+              current_shape_range_vec.at(i).first, origin_shape.GetDim(i));
+        return PARAM_INVALID;
+      }
+      origin_shape.SetDim(i, current_shape_range_vec.at(i).first);
     } else {
-      unkown_shape.SetDim(i, -1);
+      origin_shape.SetDim(i, -1);
     }
   }
-  desc.SetShape(unkown_shape);
-  desc.SetShapeRange(shape_range);
-  int64_t dynamic_shape_size = 1;
+  desc.SetShape(origin_shape);
+  desc.SetShapeRange(current_shape_range_vec);
+
+  /*int64_t dynamic_shape_size = 1;
   for (const auto range_pair : range_vec.at(index)) {
     FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second);
     dynamic_shape_size *= range_pair.second;
@@ -1003,7 +1039,7 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
   FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size);
   dynamic_shape_size *= data_type_size;
   GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size);
-  ge::TensorUtils::SetSize(desc, dynamic_shape_size);
+  ge::TensorUtils::SetSize(desc, dynamic_shape_size);*/
   graphStatus graph_ret = op->UpdateInputDesc(0, desc);
   GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
   graph_ret = op->UpdateOutputDesc(0, desc);

From c07359baedff338e46fc7f54dd259c1aaa556deb Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Thu, 31 Dec 2020 14:57:14 +0800
Subject: [PATCH 18/54] 	modified:   ge/graph/preprocess/graph_preprocess.cc

---
 ge/graph/preprocess/graph_preprocess.cc | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 57c2542a..f6a9ea80 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -907,18 +907,20 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
 Status ParseDynamicInputShapeRange(const std::string &shape_range,
                                    std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) {
   if (shape_range.size() < 2) {
-    GELOGW("Shape range %s is invalid.", shape_range.c_str());
-    return;
+    GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str());
+    return PARAM_INVALID;
   }
   // different shape_ragne of single input are split by ']'
   vector<string> shape_range_set = ge::StringUtils::Split(shape_range, ']');
   if (shape_range_set.empty()) {
-    GELOGE("Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str());
+    GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+           shape_range.c_str());
     return PARAM_INVALID;
   }
-  for (const auto &shape_range_str : shape_range_set) {
+  for (auto &shape_range_str : shape_range_set) {
     if (shape_range_str.empty()) {
-      GELOGE("Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+      GELOGE(PARAM_INVALID,
+             "Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
              shape_range.c_str());
       return PARAM_INVALID;
     }
@@ -928,7 +930,8 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
     } else if (ge::StringUtils::StartWith(shape_range_str, ",")) {
       shape_range_str = shape_range_str.substr(2, shape_range_str.size());
     } else {
-      GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+      GELOGE(PARAM_INVALID,
+             "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
              shape_range.c_str());
       return PARAM_INVALID;
     }
@@ -940,7 +943,7 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
       pair<int64_t, int64_t> range_pair;
       if (range_pair_set.size() == 1) {
         // fix dim
-        auto range_value = stol(range_pair_set.at(0).c_str());
+        auto range_value = std::stol(range_pair_set.at(0).c_str());
         if (range_value < 0) {
           range_pair = std::make_pair(1, range_value);
         } else {
@@ -949,18 +952,20 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
       } else if (range_pair_set.size() == 2) {
         // unknown dim, should get range.
         try {
-          auto range_left = stol(range_pair_set.at(0).c_str());
-          auto range_right = stol(range_pair_set.at(1).c_str());
+          auto range_left = std::stol(range_pair_set.at(0).c_str());
+          auto range_right = std::stol(range_pair_set.at(1).c_str());
           range_pair = std::make_pair(range_left, range_right);
         } catch (const std::invalid_argument) {
           GELOGE(
+            PARAM_INVALID,
             "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
             "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
             shape_range.c_str());
           return PARAM_INVALID;
         }
       } else {
-        GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+        GELOGE(PARAM_INVALID,
+               "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
                shape_range.c_str());
         return PARAM_INVALID;
       }

From 0c14832647575c92adc1a53b21d42669955bbf53 Mon Sep 17 00:00:00 2001
From: zhangxiaokun <zhang.xiaokun@huawei.com>
Date: Thu, 31 Dec 2020 15:02:03 +0800
Subject: [PATCH 19/54] Fix dynamic getnext

---
 .../load/new_model_manager/davinci_model.cc   | 49 ++++++++++++-------
 .../load/new_model_manager/davinci_model.h    |  2 +-
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 1d465441..09c27918 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2456,19 +2456,10 @@ Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) {
     GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS,
                     GELOGE(ret, "Get size from TensorDesc failed, op:%s, input id:%zu", op_desc->GetName().c_str(), i);
                     return ret);
-    std::vector<int64_t> output_shape = input_desc->GetShape().GetDims();
-    if (is_online_infer_dynamic_) {
-      if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) {
-        auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i];
-        size = gear_and_real_out_size_info[cur_dynamic_dims_];
-        auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i];
-        output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_];
-        is_dynamic_ = true;
-      }
-    }
-    GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str());
-    output_buffer_size_.push_back(size);
-    output_shape_info_.push_back(output_shape);
+    const GeShape &shape = input_desc->GetShape();
+    GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(shape.GetDims()).c_str());
+    output_buffer_size_.emplace_back(size);
+    output_shape_info_.emplace_back(shape);
   }
 
   return SUCCESS;
@@ -2481,18 +2472,38 @@ Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputT
     return SUCCESS;
   }
 
+  vector<int64_t> output_buffer_size;
+  vector<vector<int64_t>> output_shape_info;
+  size_t output_num = output_buffer_size_.size();
+  for (size_t i = 0; i < output_num; ++i) {
+    int64_t output_size = output_buffer_size_[i];
+    vector<int64_t> output_shape = output_shape_info_[i].GetDims();
+    if (is_online_infer_dynamic_) {
+      if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) {
+        auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i];
+        output_size = gear_and_real_out_size_info[cur_dynamic_dims_];
+        auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i];
+        output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_];
+        is_dynamic_ = true;
+      }
+    }
+    GELOGI("Output size is %ld, output shape is %s.", output_size, formats::JoinToString(output_shape).c_str());
+    output_buffer_size.push_back(output_size);
+    output_shape_info.push_back(output_shape);
+  }
+
   GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_);
-  for (size_t i = 0; i < output_buffer_size_.size(); ++i) {
-    std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]);
+  for (size_t i = 0; i < output_buffer_size.size(); ++i) {
+    std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size[i]]);
     if (data_buf == nullptr) {
       GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed.");
       return GE_GRAPH_MALLOC_FAILED;
     }
-    output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size_[i]), false});
-    ge::OutputTensorInfo output;
-    output.dims = output_shape_info_[i];
+    output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size[i]), false});
+    OutputTensorInfo output;
+    output.dims = output_shape_info[i];
     output.data = std::move(data_buf);
-    output.length = output_buffer_size_[i];
+    output.length = output_buffer_size[i];
     outputs.emplace_back(std::move(output));
     GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i,
            formats::JoinToString(output.dims).c_str(), output.length);
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 76c5c8f0..fba1b94b 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -1038,7 +1038,7 @@ class DavinciModel {
   vector<vector<void *>> output_addrs_list_;
 
   vector<int64_t> output_buffer_size_;
-  vector<vector<int64_t>> output_shape_info_;
+  vector<GeShape> output_shape_info_;
 
   vector<InputOutputDescInfo> output_descs_;
   vector<uint32_t> output_formats_;

From 5bedbf96964f25e47c29eaf2e7d24495dd05ea95 Mon Sep 17 00:00:00 2001
From: zhangxiaokun <zhang.xiaokun@huawei.com>
Date: Thu, 31 Dec 2020 15:30:25 +0800
Subject: [PATCH 20/54] Add UT

---
 tests/ut/ge/graph/load/davinci_model_unittest.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index eda3cb15..a9efab3d 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -111,6 +111,12 @@ TEST_F(UtestDavinciModel, init_success) {
   EXPECT_EQ(model.output_addrs_list_.size(), 1);
   EXPECT_EQ(model.task_list_.size(), 2);
 
+  OutputData output_data;
+  vector<OutputTensorInfo> outputs;
+  EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
+  EXPECT_EQ(output_data.blobs.size(), 1);
+  EXPECT_EQ(outputs.size(), 1);
+
   ProfilingManager::Instance().is_load_profiling_ = false;
 }
 

From 974433b14d480863557ea98f65bb03a6492690f2 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Thu, 31 Dec 2020 16:17:48 +0800
Subject: [PATCH 21/54] Free mem before return

---
 ge/graph/load/new_model_manager/model_manager.cc | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 01075255..6f923236 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1568,6 +1568,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
       GE_CHK_RT(rtFree(mem));
     }
   };
+  GE_MAKE_GUARD(release, callback);
   // malloc sysOpInfoList in SysOpCheckInfo
   status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
@@ -1580,7 +1581,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
-    GE_MAKE_GUARD(release, callback);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(d_res_op_list);
@@ -1589,7 +1589,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
-    GE_MAKE_GUARD(release, callback);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(d_ret_code_list);
@@ -1601,7 +1600,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
     if (status != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
-      GE_MAKE_GUARD(release, callback);
       return RT_ERROR_TO_GE_STATUS(status);
     }
     allocated_mem.push_back(d_op_type_name);
@@ -1619,7 +1617,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
     if (status != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
-      GE_MAKE_GUARD(release, callback);
       return RT_ERROR_TO_GE_STATUS(status);
     }
     allocated_mem.push_back(d_op_type_name);
@@ -1648,7 +1645,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
-    GE_MAKE_GUARD(release, callback);
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(args);
@@ -1664,7 +1660,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   status = rtStreamSynchronize(stream);
   if (status != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
-    GE_MAKE_GUARD(release, callback);
     GE_CHK_RT(rtStreamDestroy(stream));
     return RT_ERROR_TO_GE_STATUS(status);
   }
@@ -1679,7 +1674,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
 
   if (op_check_info_res.isWithoutJson) {
     GELOGI("No need to check aicpu in this scenoria.");
-    GE_MAKE_GUARD(release, callback);
     GE_CHK_RT(rtStreamDestroy(stream));
     return SUCCESS;
   }
@@ -1698,7 +1692,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
                        sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
     if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
       GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
-      GE_MAKE_GUARD(release, callback);
       GE_CHK_RT(rtStreamDestroy(stream));
       return FAILED;
     }
@@ -1722,12 +1715,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     }
     fail_reason += "not support.";
     GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
-    GE_MAKE_GUARD(release, callback);
     GE_CHK_RT(rtStreamDestroy(stream));
     return FAILED;
   }
 
-  GE_MAKE_GUARD(release, callback);
   GE_CHK_RT(rtStreamDestroy(stream));
   GELOGI("Cpu kernel launch check optype task success.");
   return SUCCESS;

From 2dfaed0e849a3bd22702ae9e8c60aab26b35ef12 Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Thu, 31 Dec 2020 16:44:35 +0800
Subject: [PATCH 22/54] add OptimizeWholeGraph

---
 ge/graph/manager/graph_manager.cc   |  3 +++
 ge/graph/optimize/graph_optimize.cc | 33 +++++++++++++++++++++++++++++
 ge/graph/optimize/graph_optimize.h  |  3 +++
 3 files changed, 39 insertions(+)

diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index beb7cd42..a0d598f3 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -731,6 +731,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
                                                  GeRootModelPtr &ge_root_model, uint64_t session_id) {
   GE_CHECK_NOTNULL(graph_node);
   GE_CHECK_NOTNULL(compute_graph);
+
+  CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
+  GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
   GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
   GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
                        GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc
index c94408de..d2e45195 100644
--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) {
   }
   return SUCCESS;
 }
+Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) {
+  if (compute_graph == nullptr) {
+    GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr.");
+    return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
+  }
+
+  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
+  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
+    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed.");
+    return GE_CLI_GE_NOT_INITIALIZED;
+  }
+
+  auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
+  GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size());
+  Status ret = SUCCESS;
+  string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
+  GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str());
+  if (!graph_optimizer.empty()) {
+    for (auto &iter : graph_optimizer) {
+      if (iter.first == exclude_core_type || iter.second == nullptr) {
+        continue;
+      }
+      GELOGI("Begin to refine running format by engine %s", iter->first.c_str());
+      ret = iter.second->OptimizeWholeGraph(*compute_graph);
+      GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first);
+      if (ret != SUCCESS) {
+        GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret);
+        return ret;
+      }
+    }
+  }
+  return ret;
+}
 }  // namespace ge
diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h
index 78d580b7..3a1960f7 100755
--- a/ge/graph/optimize/graph_optimize.h
+++ b/ge/graph/optimize/graph_optimize.h
@@ -52,6 +52,9 @@ class GraphOptimize {
   // for fe prepare optimize in quantize scene
   Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph);
 
+  // for engine to optimize merged whole graph before ge Optimize2
+  Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph);
+
   // for rts optimize before build to add attr and insert memcpy op
   Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);
 

From 2f9b6f64e6707056d98a094238579c6d63b72a4a Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Thu, 31 Dec 2020 16:55:32 +0800
Subject: [PATCH 23/54] Dynamic Inputs

---
 .../executor/hybrid_model_async_executor.cc   | 43 ++++++++++---------
 .../executor/hybrid_model_async_executor.h    |  6 ++-
 inc/framework/common/ge_types.h               |  1 +
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc
index ba717a2d..4d23cd55 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -98,10 +98,10 @@ Status HybridModelAsyncExecutor::Init() {
   return SUCCESS;
 }
 
-Status HybridModelAsyncExecutor::PreRun(InputData &current_data) {
+Status HybridModelAsyncExecutor::PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
   GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data");
   RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End");
-  GE_CHK_STATUS_RET(CopyInputData(current_data), "Failed to copy input data to model");
+  GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model");
   RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End");
   return SUCCESS;
 }
@@ -126,14 +126,9 @@ Status HybridModelAsyncExecutor::RunInternal() {
     InputData current_data = data_wrapper->GetInput();
     GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index);
 
-    HybridModelExecutor::ExecuteArgs args;
-    args.inputs.resize(input_tensors_.size());
-    for (auto &it : input_tensors_) {
-      args.inputs[it.first] = it.second;
-    }
-
     RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_);
-    ret = PreRun(current_data);
+    HybridModelExecutor::ExecuteArgs args;
+    ret = PreRun(current_data, args);
     GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
         ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
         CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
@@ -202,7 +197,9 @@ Status HybridModelAsyncExecutor::SyncVarData() {
   return SUCCESS;
 }
 
-Status HybridModelAsyncExecutor::CopyInputData(const InputData &current_data) {
+Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
+  args.inputs.resize(input_tensors_.size());
+  args.input_desc.resize(input_tensor_desc_.size());
   const std::vector<DataBuffer> &blobs = current_data.blobs;
   for (const auto &it : input_tensors_) {
     auto input_index = it.first;
@@ -230,6 +227,13 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData &current_data) {
                            data_buf.data,
                            data_buf.length,
                            RT_MEMCPY_HOST_TO_DEVICE));
+    args.inputs[input_index] = input_tensor;
+    if (is_input_dynamic_[input_index]) {
+      auto &tensor_desc = input_tensor_desc_[input_index];
+      tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
+      args.input_desc[input_index] = tensor_desc;
+      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
+    }
   }
 
   return SUCCESS;
@@ -240,7 +244,10 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
   GE_CHECK_NOTNULL(allocator);
   int input_index = 0;
   for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
-    GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str());
+    GELOGD("Init input[%u], node = %s, is_dynamic = %d",
+           input_index,
+           input_node->NodeName().c_str(),
+           input_node->is_dynamic);
     auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
     GE_CHECK_NOTNULL(output_desc);
     int64_t tensor_size = 0;
@@ -258,6 +265,8 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
     TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release()));
     tensor.SetName("Input_" + input_node->NodeName());
     input_tensors_.emplace(input_index, tensor);
+    input_tensor_desc_.emplace(input_index, output_desc);
+    is_input_dynamic_.push_back(input_node->is_dynamic);
     input_index += 1;
   }
 
@@ -402,18 +411,12 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
     buffer.data = const_cast<uint8_t *>(tensor.GetData().GetData());
     buffer.length = tensor.GetData().size();
     input_data.blobs.emplace_back(buffer);
+    input_data.shapes.emplace_back(tensor.GetTensorDesc().GetShape().GetDims());
   }
-  GE_CHK_STATUS_RET(CopyInputData(input_data), "Failed to copy input data to model");
-  GELOGD("Done copying input data successfully.");
 
   HybridModelExecutor::ExecuteArgs args;
-  args.inputs.resize(input_tensors_.size());
-  args.input_desc.resize(input_tensors_.size());
-  for (auto &it : input_tensors_) {
-    args.inputs[it.first] = it.second;
-    args.input_desc[it.first] = MakeShared<GeTensorDesc>(inputs[it.first].GetTensorDesc());
-  }
-
+  GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model");
+  GELOGD("Done copying input data successfully.");
   GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
 
   std::vector<ge::OutputTensorInfo> output_tensor_info_list;
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index 21833b0b..ad39cac5 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -70,9 +70,9 @@ class HybridModelAsyncExecutor {
 
   Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::OutputTensorInfo> &outputs);
 
-  Status PreRun(InputData &current_data);
+  Status PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args);
 
-  Status CopyInputData(const InputData &current_data);
+  Status PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args);
 
   std::mutex mu_;
   HybridModel *model_;
@@ -86,6 +86,8 @@ class HybridModelAsyncExecutor {
 
   rtStream_t stream_ = nullptr;
   std::map<uint32_t, TensorValue> input_tensors_;
+  std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
+  std::vector<bool> is_input_dynamic_;
   std::shared_ptr<ModelListener> listener_;
 };
 }  // namespace hybrid
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 4267aec4..0bf8bb83 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -81,6 +81,7 @@ struct InputData {
   std::vector<DataBuffer> blobs;  // Actual input data, currently only supports one input
   bool is_dynamic_batch = false;  // Whether is dynamic batch size scene, default:false
   std::string batch_label;        // Gear used for current inference in dynamic batch scene
+  std::vector<std::vector<int64_t>> shapes; // Input shapes
 };
 
 /// Output result structure definition

From 3bbe8c7d04824a0b206e115c8a1e3b46575ad8de Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Thu, 31 Dec 2020 16:58:17 +0800
Subject: [PATCH 24/54] add OptimizeWholeGraph

---
 ge/graph/optimize/graph_optimize.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc
index d2e45195..cd80a956 100644
--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -358,7 +358,7 @@ Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) {
       if (iter.first == exclude_core_type || iter.second == nullptr) {
         continue;
       }
-      GELOGI("Begin to refine running format by engine %s", iter->first.c_str());
+      GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str());
       ret = iter.second->OptimizeWholeGraph(*compute_graph);
       GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first);
       if (ret != SUCCESS) {

From d14900380e895606e3fdbc84f8dcf056feca89ed Mon Sep 17 00:00:00 2001
From: lwx897429 <lilei216@hisilicon.com>
Date: Thu, 31 Dec 2020 16:50:14 +0800
Subject: [PATCH 25/54] fixed memory leak occurs when keep_dtype parse failed

---
 ge/offline/keep_dtype_option.cc | 29 +++++++++++++++++++----------
 ge/offline/main.cc              |  2 ++
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/ge/offline/keep_dtype_option.cc b/ge/offline/keep_dtype_option.cc
index 348a6068..5624f21c 100644
--- a/ge/offline/keep_dtype_option.cc
+++ b/ge/offline/keep_dtype_option.cc
@@ -42,21 +42,29 @@ bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) {
 }
 
 void KeepDtypeReportError(const std::vector<std::string> &invalid_list) {
-  std::stringstream error_ops;
-  for (size_t i = 0; i < invalid_list.size(); i++) {
+  std::stringstream err_msg;
+  size_t list_size = invalid_list.size();
+  err_msg << "config file contains " << list_size;
+  if (list_size == 1) {
+    err_msg << " operator not in the graph, op name:";
+  } else {
+    err_msg << " operators not in the graph, op names:";
+  }
+
+  for (size_t i = 0; i < list_size; i++) {
     if (i == kMaxOpsNum) {
-      error_ops << "...";
+      err_msg << "..";
       break;
     }
-    error_ops << invalid_list[i] << " ";
+    err_msg << invalid_list[i];
+    if (i != list_size - 1) {
+      err_msg << " ";
+    }
   }
-  std::string err_msg = "config file contains ";
-  err_msg = err_msg.append(std::to_string(invalid_list.size()))
-                   .append(" operators not in the graph, op names:")
-                   .append(error_ops.str());
+
   ErrorManager::GetInstance().ATCReportErrMessage(
-      "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.c_str()});
-  GELOGE(FAILED, "%s", err_msg.c_str());
+      "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()});
+  GELOGE(FAILED, "%s", err_msg.str().c_str());
 }
 
 Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) {
@@ -96,6 +104,7 @@ Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep
       invalid_list.push_back(op_name);
     }
   }
+  ifs.close();
 
   if (!invalid_list.empty()) {
     KeepDtypeReportError(invalid_list);
diff --git a/ge/offline/main.cc b/ge/offline/main.cc
index ed67b913..14f7ae89 100755
--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -994,6 +994,8 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
 
   Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype);
   if (ret != SUCCESS) {
+    (void)ge_generator.Finalize();
+    (void)ge::GELib::GetInstance()->Finalize();
     return ret;
   }
 

From f175fed5884e55fbdd321dd59ab6e1cf834d05eb Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Thu, 31 Dec 2020 18:01:47 +0800
Subject: [PATCH 26/54] Custom pass register.

---
 ge/graph/manager/graph_manager.cc | 4 ++--
 metadef                           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 706908af..84572d45 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -101,7 +101,7 @@
 #include "graph/common/local_context.h"
 #include "graph/common/omg_util.h"
 #include "common/formats/utils/formats_trans_utils.h"
-#include "external/register/register_pass.h"
+#include "register/custom_pass_helper.h"
 
 namespace {
 const char *const kSummary = "Summary";
@@ -773,7 +773,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) {
 
   GE_TIMESTAMP_START(RunCustomPass);
   GraphPtr graph = std::const_pointer_cast<Graph>(const_graph);
-  GE_CHK_STATUS_RET(CustomPassHelper::Instance()->Run(graph), "Graph[%s] run custom pass fail.",
+  GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.",
                     comp_graph->GetName().c_str());
   GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass");
   return SUCCESS;
diff --git a/metadef b/metadef
index 37a90f0d..44bcbb5e 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 37a90f0dfd797306e99ec32a688be32a9ad835a4
+Subproject commit 44bcbb5ea25ada1a5393aa4c7f554d40b6859b18

From df0a3647628886f7369790b1ef1b07ed63975fd8 Mon Sep 17 00:00:00 2001
From: wxl <wanxuelei@huawei.com>
Date: Thu, 31 Dec 2020 18:24:32 +0800
Subject: [PATCH 27/54] bugfix

---
 ge/ir_build/ge_ir_build.cc | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index c7ef6c1a..95fb6749 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -36,6 +36,9 @@
 #include "model/ge_model.h"
 #include "graph/shape_refiner.h"
 #include "graph/opsproto_manager.h"
+#include "inc/pass_manager.h"
+#include "graph/passes/net_output_pass.h"
+#include "graph/passes/data_pass.h"
 
 using std::string;
 using namespace std;
@@ -233,6 +236,7 @@ class Impl {
                          ModelBufferData &ge_models);
   graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format,
                                  bool is_dynamic_input);
+  static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph);
   void SetRtSocVersion();
   void UpdateThreadContext();
   void LoadOpsProto();
@@ -243,6 +247,22 @@ class Impl {
   OmgContext omg_context_;
 };
 
+static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph) {
+  GE_CHECK_NOTNULL(compute_graph);
+
+  PassManager prepare_infershape;
+  prepare_infershape.AddPass("PrepareNetoutput", new(std::nothrow) NetOutputPass);
+  prepare_infershape.AddPass("PrepareSubGraphReflection", new (std::nothrow) DataPass);
+
+  auto ret = prepare_infershape.Run(compute_graph);
+  if ((ret != SUCCESS) && (ret != NOT_CHANGED)) {
+    GELOGE(ret, "Prepair for infershape failed, ret:%d", ret);
+    return ret;
+  }
+  GELOGD("Prepair for infershape success!");
+  return GRAPH_SUCCESS;
+}
+
 graphStatus Impl::UpdateDataOpAttr(const Graph &graph) {
   GELOGD("Enter Update Data Attr Process!");
   if (options_.find(kInputShape) == options_.end()) {
@@ -591,7 +611,12 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) {
     return GRAPH_PARAM_INVALID;
   }
 
-  auto ret = compute_graph->TopologicalSorting();
+  auto ret = Impl::InferShapePrepare(root_graph);
+  if (ret != GRAPH_SUCCESS) {
+    return ret;
+  }
+
+  ret = compute_graph->TopologicalSorting();
   if (ret != GRAPH_SUCCESS) {
     GELOGE(ret, "Acl topo logical sort failed.");
     return ret;

From 3aa7852a23f125e55a9725d2673ac02ab4689b5c Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Thu, 31 Dec 2020 18:29:38 +0800
Subject: [PATCH 28/54] fix sc

---
 inc/framework/common/ge_types.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 0bf8bb83..7293de7e 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -73,14 +73,14 @@ struct DataBuffer {
 /// @brief External input data
 ///
 struct InputData {
-  uint32_t index;                 // Index of input data
-  uint32_t timestamp;             // Data creation time
-  uint32_t timeout;               // Processing timeout
-  uint32_t model_id;              // Model ID required for data processing
-  uint64_t request_id = 0;        // Request ID
-  std::vector<DataBuffer> blobs;  // Actual input data, currently only supports one input
-  bool is_dynamic_batch = false;  // Whether is dynamic batch size scene, default:false
-  std::string batch_label;        // Gear used for current inference in dynamic batch scene
+  uint32_t index;                           // Index of input data
+  uint32_t timestamp;                       // Data creation time
+  uint32_t timeout;                         // Processing timeout
+  uint32_t model_id;                        // Model ID required for data processing
+  uint64_t request_id = 0;                  // Request ID
+  std::vector<DataBuffer> blobs;            // Actual input data, currently only supports one input
+  bool is_dynamic_batch = false;            // Whether is dynamic batch size scene, default:false
+  std::string batch_label;                  // Gear used for current inference in dynamic batch scene
   std::vector<std::vector<int64_t>> shapes; // Input shapes
 };
 

From d2dfa7779888b672c208ddd17fc37524435b312e Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Thu, 31 Dec 2020 18:55:06 +0800
Subject: [PATCH 29/54] fix sc

---
 inc/framework/common/ge_types.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 7293de7e..7854396c 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -73,15 +73,15 @@ struct DataBuffer {
 /// @brief External input data
 ///
 struct InputData {
-  uint32_t index;                           // Index of input data
-  uint32_t timestamp;                       // Data creation time
-  uint32_t timeout;                         // Processing timeout
-  uint32_t model_id;                        // Model ID required for data processing
-  uint64_t request_id = 0;                  // Request ID
-  std::vector<DataBuffer> blobs;            // Actual input data, currently only supports one input
-  bool is_dynamic_batch = false;            // Whether is dynamic batch size scene, default:false
-  std::string batch_label;                  // Gear used for current inference in dynamic batch scene
-  std::vector<std::vector<int64_t>> shapes; // Input shapes
+  uint32_t index;                            // Index of input data
+  uint32_t timestamp;                        // Data creation time
+  uint32_t timeout;                          // Processing timeout
+  uint32_t model_id;                         // Model ID required for data processing
+  uint64_t request_id = 0;                   // Request ID
+  std::vector<DataBuffer> blobs;             // Actual input data, currently only supports one input
+  bool is_dynamic_batch = false;             // Whether is dynamic batch size scene, default:false
+  std::string batch_label;                   // Gear used for current inference in dynamic batch scene
+  std::vector<std::vector<int64_t>> shapes;  // Input shapes
 };
 
 /// Output result structure definition

From 29be15b8e21cf5e598769d1bc00107af02f6e0f6 Mon Sep 17 00:00:00 2001
From: wxl <wanxuelei@huawei.com>
Date: Mon, 4 Jan 2021 11:01:01 +0800
Subject: [PATCH 30/54] bugfix

---
 ge/ir_build/ge_ir_build.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 95fb6749..8423c8bb 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -611,7 +611,7 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) {
     return GRAPH_PARAM_INVALID;
   }
 
-  auto ret = Impl::InferShapePrepare(root_graph);
+  auto ret = Impl::InferShapePrepare(compute_graph);
   if (ret != GRAPH_SUCCESS) {
     return ret;
   }

From 85f51c068757ed3d570aa1774e320884121ac9b0 Mon Sep 17 00:00:00 2001
From: wxl <wanxuelei@huawei.com>
Date: Mon, 4 Jan 2021 11:16:34 +0800
Subject: [PATCH 31/54] bugfix:aclgrphInfershapAndType suppor subgraph

---
 ge/ir_build/ge_ir_build.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 8423c8bb..78a69392 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -247,7 +247,7 @@ class Impl {
   OmgContext omg_context_;
 };
 
-static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph) {
+graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) {
   GE_CHECK_NOTNULL(compute_graph);
 
   PassManager prepare_infershape;

From 6ee84a5afc9fe043e53c8a1b21f8587578d66c8d Mon Sep 17 00:00:00 2001
From: taoxudonghaha <justsheldon@163.com>
Date: Mon, 4 Jan 2021 14:28:33 +0800
Subject: [PATCH 32/54] solve msprofiler depend

---
 CMakeLists.txt    | 12 ++++++------
 ge/CMakeLists.txt | 47 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 776a3232..9194f119 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,7 +74,7 @@ if (ENABLE_OPEN_SRC)
         set(STATIC_ACL_LIB ${GE_LIB_PATH})
         find_module(slog libslog.so ${GE_LIB_PATH})
         find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
-        find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
+        find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH})
         find_module(hccl libhccl.so ${GE_LIB_PATH})
         find_module(adump_server libadump_server.a ${GE_LIB_PATH})
         find_module(runtime libruntime.so ${GE_LIB_PATH})
@@ -83,7 +83,7 @@ if (ENABLE_OPEN_SRC)
         find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
         find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
         find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
-        find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH})
+        find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH})
         #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
     elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
@@ -97,7 +97,7 @@ if (ENABLE_OPEN_SRC)
             find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
             find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
-            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             if(PRODUCT STREQUAL "flr3")
                 message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
@@ -109,7 +109,7 @@ if (ENABLE_OPEN_SRC)
             find_module(resource libresource.so ${ASCEND_ATC_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
-            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
+            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
             #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
             if(PRODUCT STREQUAL "flr3")
             elseif(PRODUCT STREQUAL "flr1")
@@ -120,7 +120,7 @@ if (ENABLE_OPEN_SRC)
                 find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
             endif()
         elseif(PLATFORM STREQUAL "all")
-            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
+            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
             find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
             find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
             find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -128,7 +128,7 @@ if (ENABLE_OPEN_SRC)
             find_module(resource libresource.so ${ASCEND_ATC_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
-            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
+            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
         else()
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 0325a7de..88a323f3 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -615,7 +615,24 @@ set(INFER_SRC_LIST
 
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_runner.so ############
-add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})
+add_library(ge_runner SHARED
+            ${TRAIN_SRC_LIST}
+	    ${PROTO_SRCS}
+	    ${PROTO_CLIENT_SRCS}
+	    $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler_fwk>,msprofiler_fwk,msprofiler_fwk_object>>
+)
+
+add_library(msprofiler_fwk_object OBJECT IMPORTED GLOBAL)
+
+if (msprofiler_fwk_ext_LIBRARY_DIR)
+    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
+    execute_process(
+        COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object	
+    )
+    file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
+    set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST})
+endif()
 
 target_compile_definitions(ge_runner PRIVATE
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -663,9 +680,6 @@ target_link_libraries(ge_runner PRIVATE
     ge_memory
     adump_server
     static_mmpa
-    -Wl,--whole-archive
-    msprofiler_fwk
-    -Wl,--no-whole-archive
     -Wl,--no-as-needed
     graph
     ge_common
@@ -755,7 +769,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object)
 if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a)
     execute_process(
         COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a
-        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object    
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object
     )
     file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o)
 else()
@@ -764,8 +778,21 @@ endif()
 
 add_library(opensrc_ascendcl SHARED
     ${OBJECT_LIST}
+    $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler>,msprofiler,msprofiler_object>>
 )
 
+add_library(msprofiler_object OBJECT IMPORTED GLOBAL)
+
+if (msprofiler_ext_LIBRARY_DIR)
+    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object)
+    execute_process(
+        COMMAND ar x ${msprofiler_ext_LIBRARY_DIR}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object
+    )
+    file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o)
+    set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST})
+endif()
+
 target_compile_definitions(opensrc_ascendcl PRIVATE
     google=ascend_private
     $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
@@ -780,14 +807,7 @@ target_link_options(opensrc_ascendcl PRIVATE
     -Wl,--allow-multiple-definition
     -Wl,-z,muldefs
     -Wl,-Bsymbolic
-    -Wl,--exclude-libs,libascend_protobuf.a
-    -Wl,--exclude-libs,libge_executor.a
-    -Wl,--exclude-libs,libge_common.a
-    -Wl,--exclude-libs,libgraph.a
-    -Wl,--exclude-libs,libmmpa.a
-    -Wl,--exclude-libs,libregister.a
-    -Wl,--exclude-libs,liberror_manager.a
-    -Wl,--exclude-libs,libadump_server.a
+    -Wl,--exclude-libs,ALL
 )
 target_link_libraries(opensrc_ascendcl PRIVATE
                      -Wl,--whole-archive
@@ -799,7 +819,6 @@ target_link_libraries(opensrc_ascendcl PRIVATE
                      register_static
                      error_manager_static
                      adump_server
-                     msprofiler
                      -Wl,--no-whole-archive
                      -Wl,--no-as-needed
                      c_sec

From 57386ebe8faf217e8e2d82e887a4c140dcca908d Mon Sep 17 00:00:00 2001
From: taoxudonghaha <justsheldon@163.com>
Date: Mon, 4 Jan 2021 14:45:17 +0800
Subject: [PATCH 33/54] solve msprofiler and delete ge_memory

---
 ge/CMakeLists.txt                    | 23 ++++++++++----
 ge/graph/build/memory/CMakeLists.txt | 45 ----------------------------
 2 files changed, 17 insertions(+), 51 deletions(-)
 delete mode 100644 ge/graph/build/memory/CMakeLists.txt

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 88a323f3..8d9edb65 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -1,7 +1,6 @@
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
     add_subdirectory(common)
     add_subdirectory(plugin/engine)
-    add_subdirectory(graph/build/memory)
     add_subdirectory(ge_local_engine)
     add_subdirectory(host_cpu_engine)
     add_subdirectory(executor)
@@ -342,6 +341,13 @@ set(TRAIN_SRC_LIST
     "analyzer/analyzer.cc"
     "ir_build/ge_ir_build.cc"
     "ir_build/atc_ir_common.cc"
+    "graph/build/memory/memory_assigner.cc"
+    "graph/build/memory/graph_mem_assigner.cc"
+    "graph/build/memory/binary_block_mem_assigner.cc"
+    "graph/build/memory/block_mem_assigner.cc"
+    "graph/build/memory/hybrid_mem_assigner.cc"
+    "graph/build/memory/max_block_mem_assigner.cc"
+    "graph/build/memory/var_mem_assign_util.cc"
 )
 
 set(INFER_SRC_LIST
@@ -611,6 +617,13 @@ set(INFER_SRC_LIST
     "graph/label/while_label_maker.cc"
     "graph/label/partitioned_call_label_maker.cc"
     "analyzer/analyzer.cc"
+    "graph/build/memory/memory_assigner.cc"
+    "graph/build/memory/graph_mem_assigner.cc"
+    "graph/build/memory/binary_block_mem_assigner.cc"
+    "graph/build/memory/block_mem_assigner.cc"
+    "graph/build/memory/hybrid_mem_assigner.cc"
+    "graph/build/memory/max_block_mem_assigner.cc"
+    "graph/build/memory/var_mem_assign_util.cc"
 )
 
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
@@ -630,7 +643,7 @@ if (msprofiler_fwk_ext_LIBRARY_DIR)
         COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR}
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object	
     )
-    file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
+    file(GLOB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
     set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST})
 endif()
 
@@ -677,7 +690,6 @@ target_include_directories(ge_runner PRIVATE
 
 target_link_libraries(ge_runner PRIVATE
     $<BUILD_INTERFACE:intf_pub>
-    ge_memory
     adump_server
     static_mmpa
     -Wl,--no-as-needed
@@ -742,7 +754,6 @@ target_include_directories(ge_compiler PRIVATE
 
 target_link_libraries(ge_compiler PRIVATE
     $<BUILD_INTERFACE:intf_pub>
-    ge_memory
     static_mmpa
     -Wl,--no-as-needed
     graph
@@ -789,8 +800,8 @@ if (msprofiler_ext_LIBRARY_DIR)
         COMMAND ar x ${msprofiler_ext_LIBRARY_DIR}
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object
     )
-    file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o)
-    set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST})
+    file(GLOB MSPROFILER_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o)
+    set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_OBJECT_LIST})
 endif()
 
 target_compile_definitions(opensrc_ascendcl PRIVATE
diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt
deleted file mode 100644
index f6f56a54..00000000
--- a/ge/graph/build/memory/CMakeLists.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-set(SRC_LIST
-    "memory_assigner.cc"
-    "graph_mem_assigner.cc"
-    "binary_block_mem_assigner.cc"
-    "block_mem_assigner.cc"
-    "hybrid_mem_assigner.cc"
-    "max_block_mem_assigner.cc"
-    "var_mem_assign_util.cc"
-)
-
-############ libge_memory.a ############
-add_library(ge_memory STATIC ${SRC_LIST})
-
-target_compile_options(ge_memory PRIVATE
-    -Werror
-    -O2
-    -fno-common
-)
-
-target_compile_definitions(ge_memory PRIVATE
-    google=ascend_private
-    LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
-)
-
-target_link_libraries(ge_memory PRIVATE
-    $<BUILD_INTERFACE:intf_pub>
-    ascend_protobuf
-    c_sec
-)
-
-target_include_directories(ge_memory PRIVATE
-    ${CMAKE_CURRENT_LIST_DIR}
-    ${GE_CODE_DIR}/ge
-    ${GE_CODE_DIR}/inc
-    ${GE_CODE_DIR}/inc/external
-    ${METADEF_DIR}/inc
-    ${METADEF_DIR}/inc/external
-    ${METADEF_DIR}/inc/external/graph
-    ${GE_CODE_DIR}/inc/framework
-    #### yellow zone ####
-    ${GE_CODE_DIR}/../inc
-    #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc
-)

From dd6996e2e952c05a80f2ca79ab37e1645f1a18a7 Mon Sep 17 00:00:00 2001
From: zhou_lili <zhoulili20@huawei.com>
Date: Mon, 4 Jan 2021 18:58:51 +0800
Subject: [PATCH 34/54] change switchn to case and add ut

---
 .../load/new_model_manager/davinci_model.cc   | 181 +++---
 .../load/new_model_manager/davinci_model.h    |  16 +-
 .../load/new_model_manager/model_manager.cc   |  12 +-
 .../load/new_model_manager/model_manager.h    |   6 +-
 .../task_info/hccl_task_info.cc               |   4 +-
 ge/graph/manager/graph_manager.cc             |   6 +-
 .../common_subexpression_elimination_pass.cc  |   6 +-
 ge/graph/passes/multi_batch_clone_pass.cc     | 553 +++++++++++++++---
 ge/graph/passes/multi_batch_clone_pass.h      |  58 +-
 ge/graph/passes/unused_args_clean_pass.cc     |   4 +
 ge/graph/preprocess/multi_batch_copy_graph.cc |  12 +-
 ge/graph/preprocess/multi_batch_options.cc    |   5 +-
 inc/framework/omg/omg_inner_types.h           |   3 +
 metadef                                       |   2 +-
 parser                                        |   2 +-
 tests/ut/ge/CMakeLists.txt                    |   1 +
 .../ge/graph/load/davinci_model_unittest.cc   | 101 ++++
 .../passes/multi_batch_clone_pass_unittest.cc | 247 ++++++++
 18 files changed, 1016 insertions(+), 203 deletions(-)
 create mode 100644 tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc

diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index f3d6f82b..706d4b3b 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -87,6 +87,7 @@ const uint32_t kDumpL1FusionOpMByteSize = 2097152;   // 2 * 1024 * 1024
 const uint32_t kDumpFlagOfL1Fusion = 0;
 const char *const kDefaultBatchLable = "Batch_default";
 const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node";
+const char *const kMultiBatchNodePostfix = "_ascend_mbatch_batch_";
 const int32_t kInvalidStream = -1;
 const uint32_t kEndOfSequence = 0x0704000a;
 const uint32_t kEndOfSequenceNew = 507005;
@@ -867,6 +868,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
         GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
+      if (InitRealSizeAndShapeInfo(compute_graph, node) != SUCCESS) {
+        GELOGE(PARAM_INVALID, "Init real size and shape failed, Name: %s", op_desc->GetName().c_str());
+        return PARAM_INVALID;
+      }
       continue;
     }
 
@@ -1143,16 +1148,24 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
       real_virtual_addrs_.insert(real_addr);
     }
   }
+  return SUCCESS;
+}
 
+Status DavinciModel::InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node) {
+  if (node->GetName().find(kMultiBatchNodePostfix) != string::npos) {
+    GELOGD("No need to get size and shape of netoutput in subgraph.");
+    return SUCCESS;
+  }
+  GELOGD("Start init real size and shape info of %s.", node->GetName().c_str());
   GetAllGearsInfo(node);
   if (is_getnext_sink_dynamic_) {
     GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS,
                     GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;);
   }
   if (is_online_infer_dynamic_) {
-    GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(input_count, node) != SUCCESS,
+    GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(compute_graph, node) != SUCCESS,
                     GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;);
-    GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
+    GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(compute_graph, node) != SUCCESS,
                     GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
   }
 
@@ -1171,7 +1184,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) {
       if (shape_str.empty()) {
         continue;
       }
-      std::vector<int64_t> gear_info;
+      std::vector<int32_t> gear_info;
       std::vector<std::string> dims = ge::StringUtils::Split(shape_str, ',');
       for (const auto &dim : dims) {
         if (dim.empty()) {
@@ -1187,6 +1200,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) {
     }
   }
 }
+
 Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) {
   GE_CHECK_NOTNULL(node->GetOpDesc());
   size_t input_count = node->GetAllInDataAnchors().size();
@@ -1224,11 +1238,11 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) {
   return SUCCESS;
 }
 
-Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node) {
-  GELOGD("Start get gear and real output size info of %s, input count is %zu.", node->GetName().c_str(), input_count);
+Status DavinciModel::GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node) {
+  GELOGD("Start get gear and real output size info of %s.", node->GetName().c_str());
   merge_nodes_gear_and_real_out_size_info_.clear();
-  for (size_t idx = 0; idx < input_count; ++idx) {
-    auto in_anchor = node->GetAllInDataAnchors().at(idx);
+  size_t idx = 0;
+  for (const auto &in_anchor : node->GetAllInDataAnchors()) {
     auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
     if (peer_out_anchor == nullptr) {
       continue;
@@ -1236,89 +1250,106 @@ Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr
     auto peer_node = peer_out_anchor->GetOwnerNode();
     auto op_desc = peer_node->GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
-    if ((peer_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
-      if (GetRealOutputSizeOfMerge(idx, peer_node) != SUCCESS) {
+    if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
+      if (GetRealOutputSizeOfCase(graph, idx, peer_node) != SUCCESS) {
         GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str());
         return PARAM_INVALID;
       }
     }
+    idx++;
   }
   return SUCCESS;
 }
 
-Status DavinciModel::GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node) {
-  GELOGD("Start get output size of %s, which is %zu input to netoutput.", merge_node->GetName().c_str(), input_index);
-  std::map<vector<int64_t>, int64_t> gear_and_real_out_size_info;
-  for (auto &in_anchor : merge_node->GetAllInDataAnchors()) {
-    auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
-    if (peer_out_anchor == nullptr) {
-      continue;
-    }
-    auto in_node = peer_out_anchor->GetOwnerNode();
-    GELOGD("Input node of merge is %s.", in_node->GetName().c_str());
-    auto op_desc = in_node->GetOpDesc();
-    GE_CHECK_NOTNULL(op_desc);
-    string batch_label;
-    if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
-      size_t batch_index = static_cast<size_t>(stoi(batch_label.substr(batch_label.rfind('_') + 1)));
-      GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index);
-      if (batch_index > all_gears_info_.size()) {
-        GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid.");
-        return PARAM_INVALID;
-      }
-
-      const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc);
-      int output_index = ge::AnchorUtils::GetIdx(peer_out_anchor);
-      auto tensor_desc = op_desc->GetOutputDescPtr(output_index);
-      GE_CHECK_NOTNULL(tensor_desc);
-      int64_t data_size = 0;
-      if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) {
-        GELOGE(FAILED, "Get tensor size in bytes failed.");
-        return FAILED;
+Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index,
+                                             const NodePtr &case_node) {
+  GELOGD("Start get output size of %s, which is %zu input to netoutput.", case_node->GetName().c_str(), input_index);
+  const auto &func_desc = case_node->GetOpDesc();
+  GE_CHECK_NOTNULL(func_desc);
+  std::map<vector<int32_t>, int64_t> gear_and_real_out_size_info;
+  for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
+    const auto &subgraph = graph->GetSubgraph(name);
+    if (subgraph == nullptr) {
+      GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s.", name.c_str());
+      return GE_GRAPH_EMPTY_SUBGRAPH;
+    }
+    for (auto &node : subgraph->GetDirectNode()) {
+      if (node->GetType() == NETOUTPUT) {
+        auto op_desc = node->GetOpDesc();
+        GE_CHECK_NOTNULL(op_desc);
+        string batch_label;
+        if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
+          size_t batch_index = static_cast<size_t>(stoi(batch_label.substr(batch_label.rfind('_') + 1)));
+          GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index);
+          if (batch_index > all_gears_info_.size()) {
+            GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid.");
+            return PARAM_INVALID;
+          }
+
+          const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc);
+          auto tensor_desc = op_desc->GetInputDescPtr(input_index);
+          GE_CHECK_NOTNULL(tensor_desc);
+          int64_t data_size = 0;
+          if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) {
+            GELOGE(FAILED, "Get tensor size in bytes failed.");
+            return FAILED;
+          }
+          gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size;
+          GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld",
+                 batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(),
+                 input_size_list[input_index], data_size);
+        }
+        break;
       }
-      gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size;
-      GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld",
-             batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(),
-             output_size_list[output_index], data_size);
     }
   }
   merge_nodes_gear_and_real_out_size_info_[input_index] = gear_and_real_out_size_info;
   return SUCCESS;
 }
 
-Status DavinciModel::GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc) {
-  GELOGD("Start to get dynamic output dims of %s.", op_desc->GetName().c_str());
+Status DavinciModel::GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node) {
+  GELOGD("Start to get dynamic output dims of %s.", node->GetName().c_str());
   merge_nodes_gear_and_real_out_shape_info_.clear();
-  std::vector<std::string> dynamic_output_shape_info;
-  if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
-    GELOGD("Can not get dynamic output dims attr");
-    return SUCCESS;
-  }
-  GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str());
-  std::vector<vector<int64_t>> dynamic_output_shape;
-  ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape);
-  // idx: input_index to netoutput
-  for (size_t idx = 0; idx < input_count; ++idx) {
-    std::map<vector<int64_t>, vector<int64_t>> gear_and_real_out_shape_info;
-    for (auto &it : dynamic_output_shape) {
-      auto gear_index = static_cast<size_t>(it[0]);
-      if (gear_index > all_gears_info_.size()) {
-        GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0]));
-        return PARAM_INVALID;
+  size_t idx = 0;
+  for (const auto &in_anchor : node->GetAllInDataAnchors()) {
+    auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
+    if (peer_out_anchor == nullptr) {
+      continue;
+    }
+    auto peer_node = peer_out_anchor->GetOwnerNode();
+    auto op_desc = peer_node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
+      std::vector<std::string> dynamic_output_shape_info;
+      if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
+        GELOGD("Can not get dynamic output dims attr from %s.", node->GetName().c_str());
+        return SUCCESS;
       }
+      GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str());
+      std::vector<vector<int64_t>> dynamic_output_shape;
+      ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape);
+      std::map<vector<int32_t>, vector<int64_t>> gear_and_real_out_shape_info;
+      for (auto &it : dynamic_output_shape) {
+        auto gear_index = static_cast<size_t>(it[0]);
+        if (gear_index > all_gears_info_.size()) {
+          GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0]));
+          return PARAM_INVALID;
+        }
 
-      if (static_cast<size_t>(it[1]) == idx) {
-        vector<int64_t> output_shape;
-        for (size_t i = 2; i < it.size(); ++i) {
-          output_shape.emplace_back(it[i]);
+        if (static_cast<size_t>(it[1]) == idx) {
+          vector<int64_t> output_shape;
+          for (size_t i = 2; i < it.size(); ++i) {
+            output_shape.emplace_back(it[i]);
+          }
+          gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape;
+          GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.",
+                 gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(),
+                 formats::JoinToString(output_shape).c_str());
         }
-        gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape;
-        GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.",
-               gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(),
-               formats::JoinToString(output_shape).c_str());
       }
+      merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info;
     }
-    merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info;
+    idx++;
   }
   return SUCCESS;
 }
@@ -1962,7 +1993,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
                                 uint32_t &format_result) {
   /// netoutput input tensor desc
   GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr");
-                  return );
+                  return);
   Format format = op_desc->GetInputDescPtr(index)->GetFormat();
   GeShape shape = op_desc->GetInputDescPtr(index)->GetShape();
   DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType();
@@ -2567,7 +2598,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b
     GELOGD("Reinit cur dynamic dims when getnext sink dynamic.");
     cur_dynamic_dims_.clear();
     cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_);
-    auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t),
+    auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int32_t),
                         netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST);
     GE_CHK_RT_RET(ret);
   }
@@ -2668,11 +2699,11 @@ void *DavinciModel::Run(DavinciModel *model) {
       GE_IF_BOOL_EXEC(current_data.blobs.empty(), break);
       auto shape_data_buffer_data = current_data.blobs.back().data;
       auto shape_data_buffer_length = current_data.blobs.back().length;
-      model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data),
-                                      reinterpret_cast<int64_t *>(shape_data_buffer_data) +
-                                      shape_data_buffer_length / sizeof(int64_t));
+      model->cur_dynamic_dims_.assign(reinterpret_cast<int32_t *>(shape_data_buffer_data),
+                                      reinterpret_cast<int32_t *>(shape_data_buffer_data) +
+                                      shape_data_buffer_length / sizeof(int32_t));
       GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str());
-      delete[] reinterpret_cast<int64_t *>(current_data.blobs.back().data);
+      delete[] reinterpret_cast<int32_t *>(current_data.blobs.back().data);
       current_data.blobs.pop_back();
     }
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 6b930b05..9ff59d4e 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -864,11 +864,13 @@ class DavinciModel {
 
   void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info);
   bool IsGetNextSinkDynamic(const OpDescPtr &op_desc);
+
+  Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node);
   void GetAllGearsInfo(const NodePtr &node);
   Status GetGetDynamicDimsNodeInfo(const NodePtr &node);
-  Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node);
-  Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
-  Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);
+  Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
+  Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
+  Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
 
   bool is_weight_mem_has_inited_;
   bool is_feature_map_mem_has_inited_;
@@ -1021,15 +1023,15 @@ class DavinciModel {
   bool is_new_model_desc_{false};
   bool is_online_infer_dynamic_ = false;
   bool is_getnext_sink_dynamic_ = false;
-  vector<int64_t> cur_dynamic_dims_;
+  vector<int32_t> cur_dynamic_dims_;
   void *netoutput_last_input_addr_ = nullptr;
   int64_t netoutput_last_input_size_ = 0;
   size_t shape_of_cur_dynamic_dims_ = 0;
   // key: input_index: input is merge node; value: each gear info and each output size
-  map<size_t, map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
+  map<size_t, map<vector<int32_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
   // key: input_index: input is merge node; value: each gear info and each output shape
-  map<size_t, map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
-  vector<vector<int64_t>> all_gears_info_;
+  map<size_t, map<vector<int32_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
+  vector<vector<int32_t>> all_gears_info_;
 
   multimap<uint32_t, uint32_t> op_id_map_;
   vector<ProfileInfo> profile_list_;
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 6f923236..b2cce73a 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -460,8 +460,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d
 
 Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims,
                                        const vector<pair<string, vector<int64_t>>> &user_input_dims,
-                                       vector<int64_t> &cur_dynamic_dims) {
-  GELOGD(" Start get cur dynamic dims.");
+                                       vector<int32_t> &cur_dynamic_dims) {
+  GELOGD("Start get cur dynamic dims.");
   if (user_real_input_dims.size() != user_input_dims.size()) {
     GELOGE(INTERNAL_ERROR,
            "The input count of user: %zu should be equal to the data count of graph: %zu",
@@ -478,7 +478,7 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
     }
     for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) {
       if (user_input_dims.at(i).second.at(j) < 0) {
-        cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]);
+        cur_dynamic_dims.emplace_back(static_cast<int32_t>(user_real_input_dims[i][j]));
       }
     }
   }
@@ -523,7 +523,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
     input_data.blobs.push_back(data);
   }
   if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
-    std::vector<int64_t> cur_dynamic_dims;
+    std::vector<int32_t> cur_dynamic_dims;
     if (!GetLocalOmgContext().user_real_input_dims.empty()) {
       if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims,
                             cur_dynamic_dims) != SUCCESS) {
@@ -531,9 +531,9 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
         return INTERNAL_ERROR;
       }
       DataBuffer data;
-      data.data = new(std::nothrow) int64_t[cur_dynamic_dims.size()];
+      data.data = new(std::nothrow) int32_t[cur_dynamic_dims.size()];
       GE_CHECK_NOTNULL(data.data);
-      uint64_t length = static_cast<uint64_t>(cur_dynamic_dims.size() * sizeof(int64_t));
+      uint32_t length = static_cast<uint32_t>(cur_dynamic_dims.size() * sizeof(int32_t));
       GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR,
                        "Failed to memcpy data.");
       data.length = length;
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index 088ea5fd..500cad31 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -126,14 +126,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   ///
   /// @ingroup domi_ome
   /// @brief Get cur_dynamic_dims for all input.
-  /// @param [in] vector<vector<uint64_t>> &user_real_input_dims: dims info of all user_inputs.
+  /// @param [in] vector<vector<int64_t>> &user_real_input_dims: dims info of all user_inputs.
   /// @param [in] vector<pair<string, vector<int64_t>>> &user_input_dims: key:name. value:dynamic dims from option.
-  /// @param [out] vector<uint64_t> &cur_dynamic_dims: real dims gather, where the index of -1.
+  /// @param [out] vector<int32_t> &cur_dynamic_dims: real dims gather, where the index of -1.
   /// @return 0: SUCCESS / others: INTERNAL_ERROR
   ///
   Status GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims,
                            const vector<pair<string, vector<int64_t>>> &user_input_dims,
-                           vector<int64_t> &cur_dynamic_dims);
+                           vector<int32_t> &cur_dynamic_dims);
 
   ///
   /// @ingroup domi_ome
diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
index df43fd5b..8033c93e 100644
--- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
@@ -145,7 +145,9 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
     } else {
       GELOGI("need to reuse follow stream and create new follow stream.");
       size_t created_stream_num = follow_stream_usage.size();
-      hccl_stream_list_ = follow_stream_usage;
+      for (const auto &stream : follow_stream_usage) {
+        hccl_stream_list_.emplace_back(stream);
+      }
       ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id);
       if (ret != SUCCESS) {
         GELOGE(RT_FAILED, "Create hccl stream failed.");
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 6372a018..38de6ff7 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -2780,8 +2780,10 @@ Status GraphManager::ParseInputsDims(const std::vector<InputTensorInfo> &input_t
   if (!GetLocalOmgContext().dynamic_node_type.empty()) {
     vector<NodePtr> data_nodes;
     vector<NodePtr> getnext_nosink_nodes;
-    data_nodes = compute_graph_->TryGetExtAttr(kExtAttrDataNodes, data_nodes);
-    getnext_nosink_nodes = compute_graph_->TryGetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes);
+    data_nodes = GetLocalOmgContext().data_nodes;
+    getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes;
+    GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(),
+           getnext_nosink_nodes.size());
     if (GetLocalOmgContext().dynamic_node_type == DATA) {
       if (getnext_nosink_nodes.empty()) {
         // just data or data+getnext_sink
diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc
index a4662d5d..7d9724fc 100644
--- a/ge/graph/passes/common_subexpression_elimination_pass.cc
+++ b/ge/graph/passes/common_subexpression_elimination_pass.cc
@@ -26,6 +26,10 @@
 
 namespace ge {
 namespace {
+std::set<std::string> un_compute_attrs = {
+    {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES},
+};
+
 std::string GetCseKey(const NodePtr &node) {
   std::stringstream ss;
   ss << node->GetType() << "-data-inputs-";
@@ -49,7 +53,7 @@ std::string GetCseKey(const NodePtr &node) {
     ss << name << "-";
   }
 
-  ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc());
+  ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs);
 
   return ss.str();
 }
diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc
index f8451ace..b7efa070 100755
--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -25,31 +25,65 @@
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "register/op_registry.h"
+#include "graph/common/omg_util.h"
 
 namespace ge {
 namespace {
 constexpr uint8_t kDataInIndex = 0;
 constexpr uint8_t kDataOutIndex = 0;
 constexpr uint8_t kCaseArgIndex = 1;
+const int kDivisionConst = 2;
+const size_t kNumOfGetnextNode = 1;
 
 const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case";
 const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data";
+const std::string kMultiBatchGetDynamicDimsNode = "ascend_mbatch_get_dynamic_dims_node";
 const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const";
 const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex";
 const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_";
+const char *const kGetNextName = "IteratorV2";
 }  // namespace
 
+inline bool IsGetNextType(const NodePtr &node) {
+  std::string original_type;
+  GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS,
+                  GELOGW("Get original type failed."); return false);
+  return (original_type == kGetNextName);
+}
+
 Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
+  GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(FAILED, "Original graph is nullptr"); return FAILED);
   if (graph->GetParentGraph() != nullptr) {
     GELOGD("Subgraph %s skip the MultiBatchClonePass", graph->GetName().c_str());
     return SUCCESS;
   }
-
+  if (!GetLocalOmgContext().need_multi_batch) {
+    GELOGI("No need to process_multi for no_train graph.");
+    return SUCCESS;
+  }
+  std::vector<NodePtr> data_nodes;
+  std::vector<NodePtr> getnext_nosink_nodes;
+  std::vector<NodePtr> getnext_sink_nodes;
+  if (multibatch::CheckSequenceOfOptions(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) {
+    GELOGE(PARAM_INVALID, "[Train_Dynamic] CheckSequenceOfOptions failed.");
+    return PARAM_INVALID;
+  }
+  if (multibatch::UpdateNameOfInputShape(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) {
+    GELOGE(PARAM_INVALID, "[Train_Dynamic] UpdateNameForInputShapeOfOption failed.");
+    return PARAM_INVALID;
+  }
+  if (multibatch::DeleteIdentityInsertByAdapter(graph) != SUCCESS) {
+    GELOGE(PARAM_INVALID, "[Train_Dynamic] DeleteIdentityInsertByAdapter failed.");
+    return PARAM_INVALID;
+  }
   if (!multibatch::InitDynamicParams(batch_shapes_)) {
     GELOGD("There is no multi-batch options, no need clone multi-batch graph");
     return SUCCESS;
   }
-
+  if (multibatch::CheckNegativeCountOfOptions(batch_shapes_) != SUCCESS) {
+    GELOGE(PARAM_INVALID, "[Train_Dynamic] Input_shape and dynamic_dims should set correct params.");
+    return PARAM_INVALID;
+  }
   GELOGD("Begin to run Multi-batch clone on graph: %s", graph->GetName().c_str());
   GE_CHK_STATUS_RET(multibatch::CheckDynamicParams(batch_shapes_), "Invalid multi-batch param");
   if (CollectIoNodes(graph) != SUCCESS) {
@@ -66,21 +100,14 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
 
   (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_);
   ComputeGraphPtr branch = MakeShared<ComputeGraph>(graph->GetName());
-  if (branch == nullptr) {
-    GELOGE(OUT_OF_MEMORY, "Create multi-batch graph failed");
-    return OUT_OF_MEMORY;
-  }
+  GE_IF_BOOL_EXEC(branch == nullptr, GELOGE(OUT_OF_MEMORY, "Create multi batch graph failed"); return OUT_OF_MEMORY);
   (void)AttrUtils::SetStr(branch, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_);
 
   graph->InValid();  // Will modify, need topological again.
   graph->Swap(*branch);
-  if (CreateRootGraph(graph) != SUCCESS) {
-    return FAILED;
-  }
-
-  if (CreateSubgraphs(graph, branch) != SUCCESS) {
-    return FAILED;
-  }
+  GE_CHK_STATUS_RET(CreateRootGraph(graph), "Construct root graph failed.");
+  GE_CHK_STATUS_RET(CreateOriGraph(branch), "Construct original graph failed.")
+  GE_CHK_STATUS_RET(CreateSubgraphs(graph, branch), "Construct subgraph failed.");
 
   GE_CHK_STATUS_RET(PruneDirectOutput(graph), "Prune direct output failed");
   GELOGD("MultiBatchClonePass Leave");
@@ -95,9 +122,13 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
 ///
 Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
   for (const auto &node : graph->GetDirectNode()) {
+    if (!GetLocalOmgContext().dynamic_node_type.empty() && IsGetNextType(node)) {
+      all_data_nodes_.emplace_back(node);
+      GE_CHK_STATUS_RET(InitParamsOfGetNext(node), "Init params of %s failed.", node->GetName().c_str());
+    }
     if (node->GetType() == DATA) {
       all_data_nodes_.emplace_back(node);
-    } else if (node->GetType() == CONSTANT) {
+    } else if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) {
       all_const_nodes_.emplace_back(node);
     } else if (node->GetType() == NETOUTPUT) {
       all_output_nodes_.emplace_back(node);
@@ -114,10 +145,16 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
   }
 
   int64_t data_index = 0;
+  size_t getnext_node_count = 0;
   for (size_t i = 0; i < all_data_nodes_.size(); ++i) {
+    if (IsGetNextType(all_data_nodes_[i])) {
+      // just one getnext node in graph
+      getnext_node_count++;
+      continue;
+    }
     const auto &op_desc = all_data_nodes_[i]->GetOpDesc();
     if (!AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
-      (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i);
+      (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i - getnext_node_count);
     }
   }
 
@@ -133,7 +170,43 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
           "Remove edge failed");
     }
   }
+  GELOGD("Data count is %zu, const count is %zu, getnext count is %zu, output count is %zu, direct out count is %zu.",
+         all_data_nodes_.size(), all_const_nodes_.size(), getnext_node_count, all_output_nodes_.size(),
+         direct_output_.size());
+
+  return SUCCESS;
+}
 
+Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) {
+  data_count_from_getnext_ = 0;
+  getnext_sink_dynamic_dims_ = false;
+  GE_CHECK_NOTNULL(node->GetOpDesc());
+  data_count_from_getnext_ = node->GetOpDesc()->GetOutputsSize();
+  if (GetLocalOmgContext().dynamic_node_type == GETNEXT) {
+    data_count_from_getnext_ = data_count_from_getnext_ / kDivisionConst;
+    for (size_t i = 0; i < data_count_from_getnext_; ++i) {
+      GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(i);
+      GELOGD("The %zu data shape from getnext sink is %s.", i,
+             formats::JoinToString(output_desc.GetShape().GetDims()).c_str());
+      const auto &dims = output_desc.GetShape().GetDims();
+      if (std::all_of(dims.begin(), dims.end(), [](int64_t val) {return val >= 0; })) {
+        GELOGD("The %zu data from %s is static.", i, node->GetName().c_str());
+      } else {
+        getnext_sink_dynamic_dims_ = true;
+        GELOGD("Dynamic dims in the pattern of getnext sink.");
+      }
+    }
+  }
+  if (node->GetOutControlAnchor() != nullptr) {
+    for (const auto &peer_in_control_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) {
+      NodePtr next_node = peer_in_control_anchor->GetOwnerNode();
+      GE_CHECK_NOTNULL(next_node);
+      if (next_node->GetType() == CONSTANTOP) {
+        out_control_nodes_.insert(next_node);
+        GELOGD("Control edge: %s connect with %s.", node->GetName().c_str(), next_node->GetName().c_str());
+      }
+    }
+  }
   return SUCCESS;
 }
 
@@ -144,7 +217,11 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) {
+  GELOGD("Start create root graph of %s.", graph->GetName().c_str());
   uint32_t input_num = all_data_nodes_.size() + all_const_nodes_.size();
+  if (data_count_from_getnext_ != 0) {
+    input_num = input_num + data_count_from_getnext_ - kNumOfGetnextNode;
+  }
   uint32_t output_num = all_output_nodes_[0]->GetAllInDataAnchorsSize();
 
   OpDescBuilder op_builder(kMultiBatchCaseNode, CASE);
@@ -185,6 +262,10 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) {
            op_desc->GetName().c_str());
     return FAILED;
   }
+  if (!AttrUtils::SetBool(op_desc, ATTR_INSERT_BY_MBATCH, true)) {
+    GELOGE(INTERNAL_ERROR, "Failed to add insert attr on case node %s", op_desc->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
   GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed");
 
   GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed");
@@ -202,7 +283,7 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) {
 /// @param [in] NodePtr node: index data node.
 /// @return 0: SUCCESS / others: FAILED
 ///
-Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node) {
+Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node) {
   const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchDataNode, DATA);
   if (data_desc == nullptr) {
     GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed");
@@ -220,11 +301,12 @@ Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, No
   }
 
   size_t data_index = all_data_nodes_.size();
+  data_index = data_count_from_getnext_ != 0 ? data_index - kNumOfGetnextNode : data_index;
   (void)AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, data_index);
   (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true);
 
-  node = graph->AddNode(data_desc);
-  if (node == nullptr) {
+  shape_node = graph->AddNode(data_desc);
+  if (shape_node == nullptr) {
     GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed");
     return OUT_OF_MEMORY;
   }
@@ -286,15 +368,19 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
-  // Data --> MapIndex --> Case
-  NodePtr data_node;
-  GE_CHK_STATUS_RET(CreateIndexDataNode(graph, data_node), "Create data node failed");
+  // Data/GetDynamicDims --> MapIndex --> Case
+  if (!getnext_sink_dynamic_dims_) {
+    GE_CHK_STATUS_RET(CreateIndexDataNode(graph, shape_node_), "Create data node failed");
+  } else {
+    GE_CHK_STATUS_RET(CreateGetDynamicDimsNode(graph, shape_node_), "Create get dynamic dims node failed");
+  }
 
   NodePtr const_node;
   GE_CHK_STATUS_RET(CreateIndexConstNode(graph, const_node), "Create const node failed");
-
+  GELOGD("Shape node name is %s, type is %s, const node name is %s.", shape_node_->GetName().c_str(),
+         shape_node_->GetType().c_str(), const_node->GetName().c_str());
   OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex");
-  op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0))
+  op_builder.AddInput("x", shape_node_->GetOpDesc()->GetOutputDesc(0))
       .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0))
       .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32));
 
@@ -309,8 +395,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
     return OUT_OF_MEMORY;
   }
 
-  if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
-    GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", data_node->GetName().c_str(),
+  GE_CHK_STATUS_RET(AddAttrForGetDynamicDims(shape_node_), "Failed to add attr for %s.",
+                    shape_node_->GetName().c_str());
+  if (GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
+    GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", shape_node_->GetName().c_str(),
            index_node->GetName().c_str());
     return FAILED;
   }
@@ -328,6 +416,120 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
   return SUCCESS;
 }
 
+Status MultiBatchClonePass::CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node) {
+  const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchGetDynamicDimsNode, GETDYNAMICDIMS);
+  if (data_desc == nullptr) {
+    GELOGE(OUT_OF_MEMORY, "Create multi-batch get dynamic dims node failed");
+    return OUT_OF_MEMORY;
+  }
+
+  // input of GetDynamicDims is shape_of_each_data, output is gear_info
+  for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) {
+    size_t input_shape_dims = GetLocalOmgContext().user_input_dims.at(i).second.size();
+    // add input desc without GeShape for const input, value of input_shape is 1 transferred by adapter
+    if (input_shape_dims == 1 && GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) {
+      GeTensorDesc tensor_desc;
+      tensor_desc.SetFormat(FORMAT_ND);
+      tensor_desc.SetDataType(DT_INT32);
+      auto ret = data_desc->AddInputDesc(tensor_desc);
+      GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data");
+          return FAILED);
+      continue;
+    }
+    GeTensorDesc tensor_desc(GeShape({static_cast<int32_t>(input_shape_dims)}), FORMAT_ND, DT_INT32);
+    auto ret = data_desc->AddInputDesc(tensor_desc);
+    GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data");
+        return FAILED);
+  }
+  GeTensorDesc tensor_desc(GeShape({static_cast<int32_t>(batch_shapes_.at(0).size())}), FORMAT_ND, DT_INT32);
+  auto ret = data_desc->AddOutputDesc(tensor_desc);
+  GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data");
+      return FAILED);
+
+  (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true);
+
+  shape_node = graph->AddNode(data_desc);
+  if (shape_node == nullptr) {
+    GELOGE(OUT_OF_MEMORY, "Create multi-batch dynamic dims node failed");
+    return OUT_OF_MEMORY;
+  }
+  return SUCCESS;
+}
+
+Status MultiBatchClonePass::AddAttrForGetDynamicDims(const NodePtr &shape_node) {
+  if (!getnext_sink_dynamic_dims_) {
+    GELOGD("No need to add attr when not insert get dynamic dims node.");
+    return SUCCESS;
+  }
+  GELOGD("Add attr for :%s, type is %s:", shape_node->GetName().c_str(), shape_node->GetType().c_str());
+  if (!AttrUtils::SetInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_DATA_COUNT, data_count_from_getnext_)) {
+    GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_DATA_COUNT failed");
+    return INTERNAL_ERROR;
+  }
+  vector<int64_t> shape_info;
+  for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) {
+    if (GetLocalOmgContext().user_input_dims.at(i).second.size() == 1 &&
+        GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) {
+      shape_info.emplace_back(0);
+      continue;
+    }
+    shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.size());
+    for (size_t j = 0; j < GetLocalOmgContext().user_input_dims.at(i).second.size(); ++j) {
+      shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.at(j));
+    }
+  }
+  if (!AttrUtils::SetListInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_SHAPE_INFO, shape_info)) {
+    GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_SHAPE_INFO failed");
+    return INTERNAL_ERROR;
+  }
+  return SUCCESS;
+}
+
+Status MultiBatchClonePass::LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node) {
+  GELOGD("Start relink shape anchor of %s to %s.", getnext_node->GetName().c_str(), shape_node->GetName().c_str());
+  size_t input_index = 0;
+  size_t data_count = getnext_node->GetAllOutDataAnchors().size() / kDivisionConst;
+  for (size_t out_index = data_count; out_index < getnext_node->GetAllOutDataAnchors().size(); ++out_index,
+      ++input_index) {
+    GELOGD("Start add %s of %zu out_anchor to %s of %zu in_anchor.", getnext_node->GetName().c_str(), out_index,
+           shape_node->GetName().c_str(), input_index);
+    auto out_data_anchor =  getnext_node->GetOutDataAnchor(out_index);
+    auto ret = GraphUtils::AddEdge(out_data_anchor, shape_node->GetInDataAnchor(input_index));
+    GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s",
+                                                 getnext_node->GetName().c_str(), shape_node->GetName().c_str());
+        return INTERNAL_ERROR);
+  }
+  return SUCCESS;
+}
+
+Status MultiBatchClonePass::LinkGetDynamicDimsToNetOutput(const NodePtr &output_node) {
+  if (!GetLocalOmgContext().dynamic_node_type.empty()) {
+    if (!AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, GetLocalOmgContext().dynamic_dims)) {
+      GELOGE(INTERNAL_ERROR, "Failed to set all gears info attr on netoutput %s.", output_node->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+  }
+  if (getnext_sink_dynamic_dims_) {
+    GELOGD("Start link %s to %s.", shape_node_->GetName().c_str(), output_node->GetName().c_str());
+    size_t input_index = output_node->GetAllInDataAnchors().size();
+    if (NodeUtils::AppendInputAnchor(output_node, input_index + 1) != GRAPH_SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "Append input anchor of %s of %zu failed.", output_node->GetName().c_str(), input_index);
+      return INTERNAL_ERROR;
+    }
+    auto ret = GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(kDataOutIndex),
+                                   output_node->GetInDataAnchor(input_index));
+    GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s",
+                                                 output_node->GetName().c_str(), shape_node_->GetName().c_str());
+        return INTERNAL_ERROR);
+    if (!AttrUtils::SetBool(output_node->GetOpDesc(), ATTR_GETNEXT_SINK_DYNMAIC, true)) {
+      GELOGE(INTERNAL_ERROR, "Failed to set getnext sink dynamic attr on netoutput %s.",
+             output_node->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+  }
+  return SUCCESS;
+}
+
 ///
 /// @ingroup ge
 /// @brief Create input node for root graph.
@@ -337,8 +539,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) {
 Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) {
   // Data --> Case
   std::vector<NodePtr> all_data_nodes;
-  const size_t arg_index = kCaseArgIndex;
-  for (size_t i = 0; i < all_data_nodes_.size(); ++i) {
+  size_t case_input_index = kCaseArgIndex;
+  NodePtr getnext_node = nullptr;
+  size_t input_index_of_getnext = 0;
+  for (size_t i = 0; i < all_data_nodes_.size(); ++i, ++case_input_index) {
     const auto &node = all_data_nodes_[i];
     const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc());
     if (op_desc == nullptr) {
@@ -353,22 +557,60 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) {
     op_desc->SetName(node->GetName());
     const NodePtr &data = graph->AddNode(op_desc);
     GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str());
-    if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) {
-      GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s",
-             data->GetName().c_str(), case_node_->GetName().c_str());
-      return FAILED;
+    if (IsGetNextType(node)) {
+      getnext_node = data;
+      input_index_of_getnext = case_input_index;
+      case_input_index = case_input_index + data_count_from_getnext_;
+      continue;
+    } else {
+      if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(case_input_index)) !=
+          GRAPH_SUCCESS) {
+        GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(),
+               case_node_->GetName().c_str());
+        return FAILED;
+      }
     }
 
-    if (SetMaxShapeToData(data) != SUCCESS) {
+    if (SetMaxShape(data) != SUCCESS) {
+      GELOGE(FAILED, "Set max shape of %s failed.", data->GetName().c_str());
       return FAILED;
     }
     all_data_nodes.emplace_back(data);
   }
+  if (getnext_node != nullptr) {
+    if (LinkEdgeForGetNext(getnext_node, input_index_of_getnext) != SUCCESS) {
+      GELOGE(FAILED, "Failed to link edge for %s.", getnext_node->GetName().c_str());
+      return FAILED;
+    }
+    if (SetMaxShape(getnext_node) != SUCCESS) {
+      GELOGE(FAILED, "Set max shape of %s failed.", getnext_node->GetName().c_str());
+      return FAILED;
+    }
+    all_data_nodes.emplace_back(getnext_node);
+  }
 
   all_data_nodes_.swap(all_data_nodes);
   return SUCCESS;
 }
 
+Status MultiBatchClonePass::LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index) {
+  GELOGD("Start link edge for %s, which is the %zu input of %s.", getnext_node->GetName().c_str(),
+         case_input_index, case_node_->GetName().c_str());
+  for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++case_input_index) {
+    if (GraphUtils::AddEdge(getnext_node->GetOutDataAnchor(out_index),
+                            case_node_->GetInDataAnchor(case_input_index)) != GRAPH_SUCCESS) {
+      GELOGE(FAILED, "Failed to add data edge between %zu Data:%s to %zu Case:%s", out_index,
+             getnext_node->GetName().c_str(), case_input_index, case_node_->GetName().c_str());
+      return FAILED;
+    }
+  }
+  if (getnext_sink_dynamic_dims_) {
+    GE_CHK_STATUS_RET(LinkGetNextToGetDynamicDims(getnext_node, shape_node_), "Failed to add link for %s.",
+                      shape_node_->GetName().c_str());
+  }
+  return SUCCESS;
+}
+
 ///
 /// @ingroup ge
 /// @brief Create Const node for root graph.
@@ -378,7 +620,11 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) {
 Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) {
   // Const --> Case
   std::vector<NodePtr> all_const_nodes;
-  const size_t arg_index = kCaseArgIndex + all_data_nodes_.size();
+  size_t arg_index = kCaseArgIndex + all_data_nodes_.size();
+  if (data_count_from_getnext_ != 0) {
+    arg_index = arg_index + data_count_from_getnext_ - kNumOfGetnextNode;
+  }
+
   for (size_t i = 0; i < all_const_nodes_.size(); ++i) {
     const auto &node = all_const_nodes_[i];
     const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc());
@@ -395,15 +641,33 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) {
     const NodePtr &data = graph->AddNode(op_desc);
     GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str());
     if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) {
-      GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s",
-             data->GetName().c_str(), case_node_->GetName().c_str());
+      GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(),
+             case_node_->GetName().c_str());
       return FAILED;
     }
     all_const_nodes.emplace_back(data);
   }
+  ChangeConstToData();
+  all_const_nodes_.swap(all_const_nodes);
+  return SUCCESS;
+}
 
+void MultiBatchClonePass::ChangeConstToData() {
   size_t data_index = all_data_nodes_.size();
+  if (data_count_from_getnext_ != 0) {
+    data_index = data_index + data_count_from_getnext_ - kNumOfGetnextNode;
+  }
   for (size_t i = 0; i < all_const_nodes_.size(); ++i, ++data_index) {  // Trans subgraph Const to Data.
+    auto &const_node = all_const_nodes_[i];
+    bool need_change_type = true;
+    if (out_control_nodes_.find(const_node) != out_control_nodes_.end()) {
+      GELOGD("No need to change %s to data type.", const_node->GetName().c_str());
+      need_change_type = false;
+      break;
+    }
+    if (!need_change_type) {
+      continue;
+    }
     const OpDescPtr &op_desc = all_const_nodes_[i]->GetOpDesc();
     op_desc->SetType(DATA);
     (void)op_desc->DelAttr(ATTR_NAME_WEIGHTS);  // Delete weight.
@@ -413,9 +677,6 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) {
     (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index);
     (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1);
   }
-
-  all_const_nodes_.swap(all_const_nodes);
-  return SUCCESS;
 }
 
 ///
@@ -461,7 +722,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) {
       }
     }
   }
-
+  GE_CHK_STATUS_RET(LinkGetDynamicDimsToNetOutput(node), "Failed to add edge between %s to netoutput: %s.",
+                    shape_node_->GetName().c_str(), output->GetName().c_str());
   all_output_nodes_.clear();
   all_output_nodes_.emplace_back(node);
   return SUCCESS;
@@ -473,34 +735,69 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) {
 /// @param [in] const NodePtr &data: data in Root/Case graph.
 /// @return 0: SUCCESS / others: FAILED
 ///
-Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
-  auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
-  auto data_name = data->GetName();
+Status MultiBatchClonePass::SetMaxShape(const NodePtr &data) {
+  GELOGD("Start set max shape for %s.", data->GetName().c_str());
+  if (!IsGetNextType(data)) {
+    if (SetMaxShapeToData(data, kDataOutIndex) != SUCCESS) {
+      GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str());
+      return PARAM_INVALID;
+    }
+  } else {
+    for (size_t out_anchor_index = 0; out_anchor_index < data_count_from_getnext_; ++out_anchor_index) {
+      if (SetMaxShapeToData(data, out_anchor_index) != SUCCESS) {
+        GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str());
+        return PARAM_INVALID;
+      }
+    }
+  }
+  return SUCCESS;
+}
+
+Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index) {
+  GELOGD("Start update max shape of %s, %zu output.", node->GetName().c_str(), out_anchor_index);
+  auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape();
+  string data_name = node->GetName();
+  if (IsGetNextType(node)) {
+    data_name.append("_").append(std::to_string(out_anchor_index));
+  }
+  GELOGD("Update max shape of %s, shape dims is %s.", data_name.c_str(),
+         formats::JoinToString(data_shape.GetDims()).c_str());
   const auto &dims = data_shape.GetDims();
-  if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
-    return SUCCESS;
+  if (!IsGetNextType(node)) {
+    if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
+      GELOGD("No need to do anything for static data.");
+      return SUCCESS;
+    }
+  } else {
+    if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
+      if (getnext_sink_dynamic_dims_) {
+        // need to update shape of Shape_node when getnext node has dynamic data
+        GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(node, out_anchor_index), "Failed to update shape of shape node");
+      }
+      return SUCCESS;
+    }
   }
-  (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());
+  (void)AttrUtils::SetListInt(node->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());
 
-  GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex));
+  GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, kDataOutIndex));
   std::vector<std::string> input_dims_str;
   for (size_t i = 0; i < batch_shapes_.size(); ++i) {
     auto shape = data_shape;
     auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape);
     if (ret != SUCCESS) {
-      GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str());
+      GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", node->GetName().c_str());
       return ret;
     }
     tensor.SetShape(shape);
     int64_t tensor_size = 0;
     (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size);
     string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" +
-	               TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" +
+	               TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + node->GetName() + ":" +
 	               std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" +
                        formats::JoinToString(tensor.GetShape().GetDims());
     input_dims_str.emplace_back(input_str);
   }
-  (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);
+  (void)AttrUtils::SetListStr(node->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);
 
   size_t max_shape_index = 0;
   int64_t max_size = 0;
@@ -519,18 +816,72 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
       max_shape_index = i;
     }
   }
+  return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), node, data_shape, out_anchor_index);
+}
 
-  return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape);
+///
+/// @ingroup ge
+/// @brief Set max shape to Data/GetNext node in root graph.
+/// @param [in] const std::vector<int64_t> &shapes: dims of shape.
+/// @param [in] const NodePtr &data: data in Root/Case graph.
+/// @param [in] GeShape &data_shape: dims of data node.
+/// @param [in] size_t out_anchor_index: out anchor index of data node.
+/// @return 0: SUCCESS / others: FAILED
+///
+Status MultiBatchClonePass::SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape,
+                                           size_t out_anchor_index) {
+  GELOGD("Start set shape to %zu out of %s.", out_anchor_index, data->GetName().c_str());
+  if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Failed to calculate the batched shape for data node %s, the shapes may not match",
+           data->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+
+  if (NodeUtils::UpdateOutputShape(*data, out_anchor_index, data_shape) != GRAPH_SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+  if (!IsGetNextType(data)) {
+    if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str());
+      return INTERNAL_ERROR;
+    }
+  } else {
+    if (getnext_sink_dynamic_dims_) {
+      // need to update shape of Shape_node when getnext_sink_dynamic
+      GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(data, out_anchor_index), "Failed to update shape of shape node");
+    }
+  }
+
+  GELOGI("Update the data %s input/output shape to the max %s", data->GetName().c_str(),
+         formats::ShapeToString(data_shape).c_str());
+  return SUCCESS;
+}
+
+Status MultiBatchClonePass::UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index) {
+  GELOGD("Start update output shape of shape node insert by adapter, which is the %zu out of %s.", out_anchor_index,
+         node->GetName().c_str());
+  auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape();
+  size_t shape_index = out_anchor_index + (node->GetAllOutDataAnchors().size() / kDivisionConst);
+  GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(shape_index);
+  std::vector<int64_t> output_dims = {static_cast<int64_t>(data_shape.GetDims().size())};
+  GeShape output_shape(output_dims);
+  output_desc.SetShape(output_shape);
+  if (node->GetOpDesc()->UpdateOutputDesc(shape_index, output_desc) != SUCCESS) {
+    GELOGE(FAILED, "Update output desc fail.");
+    return FAILED;
+  }
+  return SUCCESS;
 }
 
 ///
 /// @ingroup ge
 /// @brief Update Data node in Subgraph.
 /// @param [in] const NodePtr &data: data in Subgraph.
-/// @param [in] size_t index: The batch index.
+/// @param [in] size_t batch_index: The batch index.
 /// @return 0: SUCCESS / others: FAILED
 ///
-Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) {
+Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t batch_index) {
   int node_index = -1;
   if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) {
     GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str());
@@ -545,6 +896,8 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index
 
   auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
   const auto &dims = data_shape.GetDims();
+  GELOGD("Start update shape of %s , batch index is %zu, dims is %s.", data->GetName().c_str(), batch_index,
+         formats::JoinToString(dims).c_str());
   if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
     return SUCCESS;
   }
@@ -559,35 +912,77 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index
   }
 
   auto parent_name = data_name.substr(0, pos);
-  return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape);
+  return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(batch_index), data, data_shape, kDataOutIndex);
 }
 
-///
-/// @ingroup ge
-/// @brief Set max shape to Data node in root graph.
-/// @param [in] const std::vector<int64_t> &shapes: dims of shape.
-/// @param [in] const NodePtr &data: data in Root/Case graph.
-/// @param [in] GeShape &data_shape: dims of data node.
-/// @return 0: SUCCESS / others: FAILED
-///
-Status MultiBatchClonePass::SetShapeToData(const vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape) {
-  // must not be error, the calc result has been checked in function InsertSwitchNForData
-  if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) {
-    return INTERNAL_ERROR;
+Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) {
+  if (data_count_from_getnext_ == 0) {
+    GELOGD("No need to change original graph without getnext node.");
+    return SUCCESS;
   }
-
-  if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str());
-    return INTERNAL_ERROR;
+  GELOGD("Start change original graph: %s when exit getnext node.", graph->GetName().c_str());
+  size_t data_index = all_data_nodes_.size() - kNumOfGetnextNode;
+  for (const auto &node : graph->GetDirectNode()) {
+    if (IsGetNextType(node)) {
+      for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++data_index) {
+        auto out_data_anchor =  node->GetOutDataAnchor(out_index);
+        GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue);
+        NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index);
+        GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.",
+                                                     out_data_anchor->GetIdx()); return INTERNAL_ERROR);
+        for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
+          GE_IF_BOOL_EXEC(in_anchor == nullptr, continue);
+          NodePtr dst_node = in_anchor->GetOwnerNode();
+          if (GraphUtils::RemoveEdge(out_data_anchor, in_anchor) != GRAPH_SUCCESS) {
+            GELOGE(INTERNAL_ERROR, "Failed to remove edge between %s to %s", node->GetName().c_str(),
+                   dst_node->GetName().c_str());
+            return INTERNAL_ERROR;
+          }
+          if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), dst_node->GetInDataAnchor(in_anchor->GetIdx())) !=
+              GRAPH_SUCCESS) {
+            GELOGE(INTERNAL_ERROR, "Failed to add edge between %s to %s", data_node->GetName().c_str(),
+                   dst_node->GetName().c_str());
+            return INTERNAL_ERROR;
+          }
+        }
+      }
+      if (graph->RemoveNode(node) != GRAPH_SUCCESS) {
+        GELOGE(GRAPH_FAILED, "Remove node %s failed!", node->GetName().c_str());
+        return GRAPH_FAILED;
+      }
+      break;
+    }
   }
+  return SUCCESS;
+}
 
-  if (NodeUtils::UpdateOutputShape(*data, kDataOutIndex, data_shape) != GRAPH_SUCCESS) {
-    GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str());
-    return INTERNAL_ERROR;
+NodePtr MultiBatchClonePass::CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor,
+                                            size_t data_index) {
+  size_t out_anchor_index = out_data_anchor->GetIdx();
+  std::string node_name = out_data_anchor->GetOwnerNode()->GetName() + "_" +  std::to_string(out_anchor_index);
+  OpDescPtr op_desc = MakeShared<OpDesc>(node_name, DATA);
+  if (op_desc == nullptr) {
+    GELOGE(OUT_OF_MEMORY, "Create data node failed.");
+    return nullptr;
   }
+  (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index);
 
-  GELOGI("Update %s input/output shape to %s", data->GetName().c_str(), formats::ShapeToString(data_shape).c_str());
-  return SUCCESS;
+  OpDescPtr getnext_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc();
+  if (getnext_op_desc == nullptr) {
+    GELOGE(OUT_OF_MEMORY, "Op desc of %s is nullptr.", out_data_anchor->GetOwnerNode()->GetName().c_str());
+    return nullptr;
+  }
+  if (op_desc->AddInputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Add %s input desc failed.", op_desc->GetName().c_str());
+    return nullptr;
+  }
+  if (op_desc->AddOutputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) {
+    GELOGE(INTERNAL_ERROR, "Add %s output desc failed.", op_desc->GetName().c_str());
+    return nullptr;
+  }
+  NodePtr data_node = graph->AddNode(op_desc);
+  GELOGD("Success create %s node.", data_node->GetName().c_str());
+  return data_node;
 }
 
 ///
@@ -598,17 +993,14 @@ Status MultiBatchClonePass::SetShapeToData(const vector<int64_t> &shapes, const
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) {
+  GELOGD("Start create subgraphs for %s.", graph->GetName().c_str());
   const auto &op_desc = case_node_->GetOpDesc();
   for (size_t i = 0; i < batch_shapes_.size(); ++i) {
     std::vector<NodePtr> input_nodes;
     std::vector<NodePtr> output_nodes;
     const std::string postfix = kMultiBatchNodePostfix + std::to_string(i);
     ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes);
-    if (subgraph == nullptr) {
-      GELOGE(FAILED, "Create multi-batch case node failed");
-      return FAILED;
-    }
-
+    GE_IF_BOOL_EXEC(subgraph == nullptr, GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED);
     subgraph->SetName("Batch_" + std::to_string(i));
     subgraph->SetParentNode(case_node_);
     subgraph->SetParentGraph(graph);
@@ -621,6 +1013,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
     op_desc->AddSubgraphName(key_name);
     op_desc->SetSubgraphInstanceName(i, subgraph->GetName());
 
+    GELOGD("The %s has %zu input, %zu output.", subgraph->GetName().c_str(), input_nodes.size(), output_nodes.size());
     for (const auto &data : input_nodes) {
       GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str());
     }
@@ -666,6 +1059,7 @@ Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) {
 /// @return 0: SUCCESS / others: FAILED
 ///
 Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) {
+  GELOGD("Start prune direct output.");
   const auto &func_desc = case_node_->GetOpDesc();
   uint32_t unused_num = 0;
   uint32_t output_num = func_desc->GetOutputsSize();
@@ -710,6 +1104,7 @@ Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) {
 ///
 Status MultiBatchClonePass::UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num) {
   if (unused_num == 0) {
+    GELOGD("No need to update output tensor.");
     return SUCCESS;
   }
 
diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h
index ee137b5a..66e92892 100755
--- a/ge/graph/passes/multi_batch_clone_pass.h
+++ b/ge/graph/passes/multi_batch_clone_pass.h
@@ -36,6 +36,7 @@ class MultiBatchClonePass : public GraphPass {
   /// @return 0: SUCCESS / others: FAILED
   ///
   Status CollectIoNodes(const ComputeGraphPtr &graph);
+  Status InitParamsOfGetNext(const NodePtr &node);
 
   ///
   /// @ingroup ge
@@ -49,10 +50,12 @@ class MultiBatchClonePass : public GraphPass {
   /// @ingroup ge
   /// @brief Create index data node for root graph.
   /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
-  /// @param [in] NodePtr node: index data node.
+  /// @param [in] NodePtr shape_node: index data node, DATA or GETDYNAMICDIMS type.
   /// @return 0: SUCCESS / others: FAILED
   ///
-  Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node);
+  Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node);
+
+  Status CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node);
 
   ///
   /// @ingroup ge
@@ -70,6 +73,9 @@ class MultiBatchClonePass : public GraphPass {
   /// @return 0: SUCCESS / others: FAILED
   ///
   Status CreateIndexNode(const ComputeGraphPtr &graph);
+  Status AddAttrForGetDynamicDims(const NodePtr &shape_node);
+  Status LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node);
+  Status LinkGetDynamicDimsToNetOutput(const NodePtr &output_node);
 
   ///
   /// @ingroup ge
@@ -78,39 +84,54 @@ class MultiBatchClonePass : public GraphPass {
   /// @return 0: SUCCESS / others: FAILED
   ///
   Status CreateInputNode(const ComputeGraphPtr &graph);
+  Status LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index);
 
   ///
   /// @ingroup ge
-  /// @brief Create Const node for root graph.
-  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
+  /// @brief Set max shape to Data node in root graph.
+  /// @param [in] const NodePtr &data: data in Root/Case graph.
   /// @return 0: SUCCESS / others: FAILED
   ///
-  Status CreateConstNode(const ComputeGraphPtr &graph);
+  Status SetMaxShape(const NodePtr &data);
+  Status SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index);
+  ///
+  /// @ingroup ge
+  /// @brief Set max shape to Data/GetNext node in root graph.
+  /// @param [in] const std::vector<int64_t> &shapes: dims of shape.
+  /// @param [in] const NodePtr &data: data in Root/Case graph.
+  /// @param [in] GeShape &data_shape: dims of data node.
+  /// @param [in] size_t out_anchor_index: out anchor index of data node.
+  /// @return 0: SUCCESS / others: FAILED
+  ///
+  Status SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape,
+                        size_t out_anchor_index);
+  Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index);
 
   ///
   /// @ingroup ge
-  /// @brief Create output node for root graph.
+  /// @brief Create Const node for root graph.
   /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
   /// @return 0: SUCCESS / others: FAILED
   ///
-  Status CreateOutputNode(const ComputeGraphPtr &graph);
+  Status CreateConstNode(const ComputeGraphPtr &graph);
+  void ChangeConstToData();
 
   ///
   /// @ingroup ge
-  /// @brief Set max shape to Data node in root graph.
-  /// @param [in] const NodePtr &data: data in Root/Case graph.
+  /// @brief Create output node for root graph.
+  /// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
   /// @return 0: SUCCESS / others: FAILED
   ///
-  Status SetMaxShapeToData(const NodePtr &data);
+  Status CreateOutputNode(const ComputeGraphPtr &graph);
 
   ///
   /// @ingroup ge
   /// @brief Update Data node in Subgraph.
   /// @param [in] const NodePtr &data: data in Subgraph.
-  /// @param [in] size_t index: The batch index.
+  /// @param [in] size_t batch_index: The batch index.
   /// @return 0: SUCCESS / others: FAILED
   ///
-  Status UpdateSubgraphData(const NodePtr &data, size_t index);
+  Status UpdateSubgraphData(const NodePtr &data, size_t batch_index);
 
   ///
   /// @ingroup ge
@@ -122,13 +143,12 @@ class MultiBatchClonePass : public GraphPass {
 
   ///
   /// @ingroup ge
-  /// @brief Set max shape to Data node in root graph.
-  /// @param [in] const std::vector<int64_t> &shapes: dims of shape.
-  /// @param [in] const NodePtr &data: data in Root/Case graph.
-  /// @param [in] GeShape &data_shape: dims of data node.
+  /// @brief Create nodes for root graph.
+  /// @param [in] const ComputeGraphPtr &graph: Original graph.
   /// @return 0: SUCCESS / others: FAILED
   ///
-  Status SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape);
+  Status CreateOriGraph(const ComputeGraphPtr &graph);
+  NodePtr CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, size_t data_index);
 
   ///
   /// @ingroup ge
@@ -168,6 +188,10 @@ class MultiBatchClonePass : public GraphPass {
   std::map<string, vector<vector<int64_t>>> data_to_dynamic_info_;
 
   NodePtr case_node_;
+  size_t data_count_from_getnext_ = 0;
+  bool getnext_sink_dynamic_dims_ = false;
+  NodePtr shape_node_;
+  std::set<NodePtr> out_control_nodes_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_MULTI_BATCH_CLONE_PASS_H_
diff --git a/ge/graph/passes/unused_args_clean_pass.cc b/ge/graph/passes/unused_args_clean_pass.cc
index 83fd0438..ec66b129 100755
--- a/ge/graph/passes/unused_args_clean_pass.cc
+++ b/ge/graph/passes/unused_args_clean_pass.cc
@@ -204,6 +204,10 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const map<ComputeGraphPtr, map<uin
   GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, old_anchor), "Remove edge failed");
   GELOGI("Remove edge: %s %s", out_node->GetName().c_str(), func_node->GetName().c_str());
 
+  if (out_node->GetInDataNodes().size() == 0 && out_node->GetOutAllNodes().size() == 0) {
+    GE_CHK_GRAPH_STATUS_RET(out_node->GetOwnerComputeGraph()->RemoveNode(out_node), "Remove node failed: %s",
+                            out_node->GetName().c_str());
+  }
   return SUCCESS;
 }
 }  // namespace ge
\ No newline at end of file
diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc
index c8880b2e..5506435e 100644
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -1692,13 +1692,11 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) {
 }
 
 Status ProcessMultiBatch(ComputeGraphPtr &graph) {
-  if (GetLocalOmgContext().dynamic_node_type.empty()) {
-    const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
-    if (multi_batch_with_switchn == nullptr) {
-      PassManager pass_manager;
-      GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
-      return pass_manager.Run(graph);
-    }
+  const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
+  if (multi_batch_with_switchn == nullptr) {
+    PassManager pass_manager;
+    GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
+    return pass_manager.Run(graph);
   }
   if (!GetLocalOmgContext().need_multi_batch) {
     GELOGI("No need to process_multi for no_train graph.");
diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc
index c26b08bc..aba2b88d 100644
--- a/ge/graph/preprocess/multi_batch_options.cc
+++ b/ge/graph/preprocess/multi_batch_options.cc
@@ -99,9 +99,8 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector<NodePtr> &data_n
   }
   GELOGI("Data count is %zu, getnext nosink count is %zu, getnext sink count is %zu.", data_nodes.size(),
          getnext_nosink_nodes.size(), getnext_sink_nodes.size());
-  GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrDataNodes, data_nodes), GELOGW("Set data nodes attr failed.");)
-  GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes),
-                  GELOGW("Set getnext nosink nodes attr failed.");)
+  GetLocalOmgContext().data_nodes = data_nodes;
+  GetLocalOmgContext().getnext_nosink_nodes = getnext_nosink_nodes;
   return SUCCESS;
 }
 
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index dab79053..1049b6b5 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -26,6 +26,7 @@
 #include <vector>
 #include "framework/common/fmk_error_codes.h"
 #include "register/register_fmk_types.h"
+#include "graph/node.h"
 
 using domi::DOMI_TENSOR_ND;
 using domi::DOMI_TENSOR_RESERVED;
@@ -120,6 +121,8 @@ struct OmgContext {
   std::vector<std::vector<int64_t>> user_real_input_dims;
   std::vector<int64_t> cur_dynamic_dims;
   bool need_multi_batch = false;
+  std::vector<NodePtr> data_nodes;
+  std::vector<NodePtr> getnext_nosink_nodes;
 };
 }  // namespace ge
 
diff --git a/metadef b/metadef
index 44bcbb5e..fe37bc34 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 44bcbb5ea25ada1a5393aa4c7f554d40b6859b18
+Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900
diff --git a/parser b/parser
index 5b93b050..336cd310 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit 5b93b050dd7ca5b77c3001a790031d877fa10956
+Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index dcf389c0..db725dfb 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -627,6 +627,7 @@ set(PASS_TEST_FILES
     "graph/passes/net_output_pass_unittest.cc"
     "graph/passes/no_use_reshape_remove_pass_unittest.cc"
     "graph/passes/infershape_pass_unittest.cc"
+	"graph/passes/multi_batch_clone_pass_unittest.cc"
 )
 
 set(KERNEL_TEST_FILES
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index a9efab3d..9e51585b 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -32,6 +32,18 @@ class UtestDavinciModel : public testing::Test {
   void SetUp() {}
 
   void TearDown() {}
+  public:
+    NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
+      GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
+      auto op_desc = std::make_shared<OpDesc>(name, type);
+      for (auto i = 0; i < in_num; ++i) {
+        op_desc->AddInputDesc(test_desc);
+      }
+      for (auto i = 0; i < out_num; ++i) {
+        op_desc->AddOutputDesc(test_desc);
+      }
+      return graph->AddNode(op_desc);
+    }
 };
 
 TEST_F(UtestDavinciModel, init_success) {
@@ -324,5 +336,94 @@ TEST_F(UtestDavinciModel, SyncVarData_test) {
   EXPECT_NE(model.SyncVarData(), SUCCESS);
 }
 
+TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  OpDescPtr op_output = CreateOpDesc("output_ascend_mbatch_batch_1", NETOUTPUT);
+  op_output->AddInputDesc(tensor);
+  op_output->SetInputOffset({1024});
+  NodePtr node_output = graph->AddNode(op_output);
+  EXPECT_EQ(model.InitRealSizeAndShapeInfo(graph, node_output), SUCCESS);
+}
+
+TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) {
+  DavinciModel model(0, nullptr);
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+
+  OpDescPtr data1 = CreateOpDesc("data1", DATA);
+  GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+  data1->AddInputDesc(shape_desc);
+  data1->AddOutputDesc(shape_desc);
+  NodePtr data1_node = graph->AddNode(data1);
+
+  OpDescPtr case_node = CreateOpDesc("case1", CASE);
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  case_node->AddInputDesc(tensor);
+  case_node->AddOutputDesc(tensor);
+  NodePtr case1_node = graph->AddNode(case_node);
+
+  OpDescPtr output = CreateOpDesc("output1", NETOUTPUT);
+  output->AddInputDesc(tensor);
+  output->SetSrcName( { "case1" } );
+  output->SetSrcIndex( { 0 } );
+  NodePtr output_node = graph->AddNode(output);
+
+  GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0));
+  GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  
+  (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8");
+  (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true);
+
+  model.is_getnext_sink_dynamic_ = false;
+  model.is_online_infer_dynamic_ = true;
+  auto ret = model.InitRealSizeAndShapeInfo(graph, output_node);
+  // GetGearAndRealOutShapeInfo without ATTR_NAME_DYNAMIC_OUTPUT_DIMS
+  EXPECT_EQ(ret, SUCCESS);
+  vector<string> dynamic_output_dims = {"0,0,1,1,0,2,2,0,4,3,0,8"};
+  (void)AttrUtils::SetListStr(output_node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims);
+  ret = model.InitRealSizeAndShapeInfo(graph, output_node);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) {
+  DavinciModel model(0, nullptr);
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+
+  OpDescPtr data1 = CreateOpDesc("data1", DATA);
+  GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+  data1->AddInputDesc(shape_desc);
+  data1->AddOutputDesc(shape_desc);
+  NodePtr data1_node = graph->AddNode(data1);
+
+  OpDescPtr shape_node = CreateOpDesc("ascend_mbatch_get_dynamic_dims_node", GETDYNAMICDIMS);
+  GeTensorDesc in_tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  GeTensorDesc out_tensor(GeShape({4,3}), FORMAT_NCHW, DT_FLOAT);
+  shape_node->AddInputDesc(in_tensor);
+  shape_node->AddOutputDesc(out_tensor);
+  NodePtr get_dynamic_dims_node = graph->AddNode(shape_node);
+
+  OpDescPtr output = CreateOpDesc("output1", NETOUTPUT);
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  output->AddInputDesc(tensor);
+  output->SetSrcName( { "data1", "ascend_mbatch_get_dynamic_dims_node" } );
+  output->SetSrcIndex( { 0, 1 } );
+  NodePtr output_node = graph->AddNode(output);
+  GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  GraphUtils::AddEdge(get_dynamic_dims_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(1));
+
+  (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1,3;;4,3;,3");
+
+  model.is_getnext_sink_dynamic_ = true;
+  model.is_online_infer_dynamic_ = false;
+  auto ret = model.InitRealSizeAndShapeInfo(graph, output_node);
+  EXPECT_EQ(ret, SUCCESS);
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 4;
+  ret = model.InitRealSizeAndShapeInfo(graph, output_node);
+  EXPECT_EQ(ret, SUCCESS);
+}
 
 }  // namespace ge
diff --git a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc
new file mode 100644
index 00000000..b1cd6d4d
--- /dev/null
+++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc
@@ -0,0 +1,247 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/passes/multi_batch_clone_pass.h"
+
+#include <gtest/gtest.h>
+#include <set>
+#include <string>
+
+#include "inc/pass_manager.h"
+#include "graph/utils/tensor_utils.h"
+#include "graph/common/local_context.h"
+#include "graph/passes/multi_batch_pass.h"
+#include "graph/preprocess/multi_batch_copy_graph.h"
+#include "graph/preprocess/insert_op/util_insert_aipp_op.h"
+#include "framework/omg/omg_inner_types.h"
+#include "register/op_registry.h"
+
+
+namespace ge{
+class UtestMultiBatchClonePass : public testing::Test {
+protected:
+  void SetUp() {
+    SetLocalOmgContext(domi::GetContext());
+    GetLocalOmgContext().dynamic_image_size.clear();
+    GetLocalOmgContext().dynamic_batch_size.clear();
+  }
+  void TearDown() {
+    GetLocalOmgContext().dynamic_image_size.clear();
+    GetLocalOmgContext().dynamic_batch_size.clear();
+    GetLocalOmgContext().dynamic_node_type.clear();
+  }
+
+public:
+  NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
+    GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
+    auto op_desc = std::make_shared<OpDesc>(name, type);
+    for (auto i = 0; i < in_num; ++i) {
+      op_desc->AddInputDesc(test_desc);
+    }
+    for (auto i = 0; i < out_num; ++i) {
+      op_desc->AddOutputDesc(test_desc);
+    }
+    return graph->AddNode(op_desc);
+  }
+
+  NodePtr MakeConstNode(const ComputeGraphPtr &graph) {
+    static uint32_t index = 0;
+    GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
+    auto op_desc = std::make_shared<OpDesc>("dynamic_const_" + std::to_string(index++), "Const");
+    op_desc->AddOutputDesc(test_desc);
+    return graph->AddNode(op_desc);
+  }
+
+  void make_original_graph(const ComputeGraphPtr &graph) {
+    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
+    {
+      auto data1 = MakeNode(graph, 1, 1, "data", "Data");
+      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
+      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
+      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
+      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};
+
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
+      auto const1 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
+      auto const2 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
+    }
+
+    auto bn_conv1 = MakeNode(graph, 4, 1, "bn_conv1", "BNInference");
+    {
+      GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(0));
+      auto const1 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(1));
+      auto const2 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(2));
+      auto const3= MakeConstNode(graph);
+      GraphUtils::AddEdge(const3->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(3));
+    }
+
+    auto scale_conv1 = MakeNode(graph, 4, 1, "scale1", "Scale");
+    {
+      GraphUtils::AddEdge(bn_conv1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(0));
+      auto const1 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(1));
+      auto const2 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(2));
+    }
+
+    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
+    GraphUtils::AddEdge(scale_conv1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  }
+
+  void GraphWithJustData(const ComputeGraphPtr &graph) {
+    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
+    {
+      auto data1 = MakeNode(graph, 1, 1, "data", "Data");
+      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
+      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
+      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
+      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};
+
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
+      auto const1 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
+      auto const2 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
+    }
+
+    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
+    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  }
+
+  void GraphWithGetNextNosink(const ComputeGraphPtr &graph) {
+    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
+    {
+      auto data1 = MakeNode(graph, 1, 1, "IteratorGetNext_data", "Data");
+      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
+      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
+      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
+      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};
+
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
+      auto const1 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
+      auto const2 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
+    }
+
+    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
+    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  }
+
+  // getnext has one data and has one out of shape
+  void GraphWithGetNextSink(const ComputeGraphPtr &graph) {
+    auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D");
+    {
+      auto data1 = MakeNode(graph, 1, 2, "data", "IteratorV2");
+      GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+      GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
+      data1->GetOpDesc()->UpdateOutputDesc(1, shape_desc);
+      AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0);
+      GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})};
+
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
+      auto identity = MakeNode(graph, 1, 0, "identity", "Identity");
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(1), identity->GetInDataAnchor(0));
+      auto const1 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
+      auto const2 = MakeConstNode(graph);
+      GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2));
+    }
+
+    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
+    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  }
+};
+
+// graph is nullptr
+TEST_F(UtestMultiBatchClonePass, graph_nullptr) {
+  PassManager pass_manager;
+  pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass);
+  ComputeGraphPtr graph;
+  EXPECT_EQ(pass_manager.Run(graph), PARAM_INVALID);
+}
+
+// graph with subgraph
+TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) {
+  PassManager pass_manager;
+  pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass);
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+  make_original_graph(graph);
+  EXPECT_EQ(pass_manager.Run(graph), SUCCESS);
+
+  ComputeGraphPtr owner = std::make_shared<ComputeGraph>("test_owner");
+  auto func_node = MakeNode(owner, 3, 1, "test_if", "If");
+  graph->SetParentNode(func_node);
+  graph->SetParentGraph(owner);
+  EXPECT_EQ(pass_manager.Run(graph), SUCCESS);
+}
+
+//graph is uncompute graph, not need to do multi batch
+TEST_F(UtestMultiBatchClonePass, uncompute_graph) {
+  MultiBatchClonePass multi_batch_clone;
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+  make_original_graph(graph);
+  GetLocalOmgContext().need_multi_batch = false;
+  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
+}
+
+
+//compute_graph with data from DATA
+TEST_F(UtestMultiBatchClonePass, compute_graph_with_data) {
+  MultiBatchClonePass multi_batch_clone;
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+  GraphWithJustData(graph);
+  GetLocalOmgContext().need_multi_batch = true;
+  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
+  GetLocalOmgContext().dynamic_node_type = DATA;
+  GetLocalOmgContext().dynamic_dims = "1;2;4;8";
+  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
+  EXPECT_EQ(GetLocalOmgContext().data_nodes.size(), 1);
+}
+
+//compute_graph with data from GetNext_nosink
+TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_nosink) {
+  MultiBatchClonePass multi_batch_clone;
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+  GraphWithGetNextNosink(graph);
+  GetLocalOmgContext().need_multi_batch = true;
+  GetLocalOmgContext().dynamic_node_type = GETNEXT;
+  GetLocalOmgContext().dynamic_dims = "1;2;4;8";
+  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
+  EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 1);
+}
+
+//compute_graph with data from GetNext_nosink
+TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_sink) {
+  MultiBatchClonePass multi_batch_clone;
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+  GraphWithGetNextSink(graph);
+  GetLocalOmgContext().need_multi_batch = true;
+  GetLocalOmgContext().dynamic_node_type = GETNEXT;
+  GetLocalOmgContext().dynamic_dims = "1;2;4;8";
+  EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS);
+  EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 0);
+}
+
+}

From 6ce14620cc1d1a4649aeb23a9d169a9789ccbcff Mon Sep 17 00:00:00 2001
From: zhangxiaokun <zhang.xiaokun@huawei.com>
Date: Mon, 4 Jan 2021 21:15:19 +0800
Subject: [PATCH 35/54] Eliminate data_op_list_

---
 ge/executor/ge_executor.cc                    |  39 +-
 ge/graph/execute/graph_execute.cc             |  28 +-
 ge/graph/execute/graph_execute.h              |   6 +-
 .../load/new_model_manager/davinci_model.cc   | 321 ++++++++--------
 .../load/new_model_manager/davinci_model.h    |  55 ++-
 .../new_model_manager/davinci_model_parser.cc |  75 ----
 .../load/new_model_manager/model_manager.cc   |  14 +-
 .../load/new_model_manager/model_manager.h    |  16 +-
 inc/framework/executor/ge_executor.h          |   3 -
 .../ge/graph/load/davinci_model_unittest.cc   | 343 +++++++++++++++++-
 .../graph/load/kernel_task_info_unittest.cc   |   1 -
 11 files changed, 539 insertions(+), 362 deletions(-)

diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index abdc0c3f..0ea0e66d 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -676,7 +676,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo
     GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!");
     return ACL_ERROR_GE_EXEC_NOT_INIT;
   }
-  Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info);
+  Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info);
   if (ret != SUCCESS) {
     GELOGW("GetAIPPInfo is not success.");
     return ret;
@@ -713,43 +713,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn
   return SUCCESS;
 }
 
-Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
-                                               std::vector<TensorDesc> &output_desc) {
-  GELOGI("get model desc info for zero copy begin.");
-  if (!isInit_) {
-    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
-    return ACL_ERROR_GE_EXEC_NOT_INIT;
-  }
-
-  std::vector<InputOutputDescInfo> input_desc_infos;
-  std::vector<InputOutputDescInfo> output_desc_infos;
-  std::vector<uint32_t> input_formats;
-  std::vector<uint32_t> output_formats;
-
-  Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos,
-                                                                input_formats, output_formats);
-  if (ret != domi::SUCCESS) {
-    GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret);
-    return ACL_ERROR_GE_GET_TENSOR_INFO;
-  }
-
-  if (input_formats.size() != input_desc_infos.size()) {
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "input_formats.size() != input_desc_infos.size().");
-    return ACL_ERROR_GE_PARAM_INVALID;
-  }
-
-  if (output_formats.size() != output_desc_infos.size()) {
-    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats.size() != output_desc_infos.size().");
-    return ACL_ERROR_GE_PARAM_INVALID;
-  }
-
-  GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats);
-  GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats);
-
-  GELOGI("get model desc info from zero copy end.");
-  return ge::SUCCESS;
-}
-
 Status GeExecutor::CommandHandle(const Command &command) {
   Status ret = GraphLoader::CommandHandle(command);
   if (ret != SUCCESS) {
diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc
index 97e2fd1b..3c5618e8 100755
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -560,34 +560,10 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynam
   return SUCCESS;
 }
 
-Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
-                                                        vector<InputOutputDescInfo> &output_desc,
-                                                        std::vector<uint32_t> &input_formats,
-                                                        std::vector<uint32_t> &out_formats) {
-  try {
-    auto model_manager = ge::ModelManager::GetInstance();
-    GE_CHECK_NOTNULL(model_manager);
-    Status ret =
-        model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats);
-    if (ret != SUCCESS) {
-      GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed.");
-      return ret;
-    }
-  } catch (std::bad_alloc &) {
-    GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfoForZeroCopy failed, bad memory allocation occur !");
-    return MEMALLOC_FAILED;
-  } catch (...) {
-    GELOGE(FAILED, "GetInputOutputDescInfoForZeroCopy failed, some exceptions occur !");
-    return FAILED;
-  }
-
-  return SUCCESS;
-}
-
-Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
+Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
   auto model_manager = ge::ModelManager::GetInstance();
   GE_CHECK_NOTNULL(model_manager);
-  Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info);
+  Status ret = model_manager->GetAippInfo(model_id, index, aipp_info);
   if (ret != SUCCESS) {
     GELOGW("GetAIPPInfo is not success.");
     return ret;
diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h
index efc30743..d2a92e47 100755
--- a/ge/graph/execute/graph_execute.h
+++ b/ge/graph/execute/graph_execute.h
@@ -73,7 +73,7 @@ class GraphExecutor {
                                        vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
                                        std::vector<uint32_t> &output_formats, bool new_model_desc = false);
 
-  static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
+  static Status GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
 
   static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
 
@@ -110,10 +110,6 @@ class GraphExecutor {
 
   static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);
 
-  static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
-                                                  vector<InputOutputDescInfo> &output_desc,
-                                                  std::vector<uint32_t> &input_formats,
-                                                  std::vector<uint32_t> &output_formats);
   static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
   static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                           std::vector<InputOutputDims> &output_dims);
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 706d4b3b..1e8192a5 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -156,7 +156,6 @@ DavinciModel::~DavinciModel() {
     GE_CHK_STATUS(ModelRunStop());
 
     op_list_.clear();
-    data_op_list_.clear();
     tensor_name_to_fixed_addr_size_.clear();
     tensor_name_to_peer_output_index_.clear();
     GE_DELETE_NEW_SINGLE(data_inputer_);
@@ -878,7 +877,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
     auto it = op_desc_handle.find(op_desc->GetType());
     if (it != op_desc_handle.end()) {
       if ((this->*it->second)(op_desc) != SUCCESS) {
-        GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
+        GELOGE(PARAM_INVALID, "Node init failed, Name: %s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
       continue;
@@ -931,7 +930,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
 
   GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc.");
   GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle.");
-  return OptInputOutputInfo(data_by_index, output_op_list);
+  return GenInputOutputInfo(data_by_index, output_op_list);
 }
 
 void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
@@ -974,9 +973,6 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
   }
 
   data_by_index[data_index] = op_desc;
-  auto data_op = AttrUtils::CopyOpDesc(op_desc);
-  GE_CHECK_NOTNULL(data_op);
-  data_op_list_.push_back(data_op);
   if (known_node_) {
     return SUCCESS;
   }
@@ -1022,23 +1018,18 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
 /// @param [in] output_op_list: list of NetOutput op.
 /// @return Status
 ///
-Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index,
+Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index,
                                         const vector<OpDescPtr> &output_op_list) {
-  GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size());
-  if (data_by_index.size() != data_op_list_.size()) {
-    GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size());
-    return INTERNAL_ERROR;
-  }
-
-  data_op_list_.clear();
+  GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size());
   for (auto &item : data_by_index) {
-    auto data_op = AttrUtils::CopyOpDesc(item.second);
-    GE_CHECK_NOTNULL(data_op);
-    data_op_list_.emplace_back(data_op);
     auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
     GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
     input_addrs_list_.emplace_back(output_addrs);
 
+    GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second), "Init AIPP Info failed");
+    GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed");
+    GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed");
+    GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed");
     if (item.second->GetType() == AIPP_DATA_TYPE) {
       GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str());
       is_dynamic_aipp_ = true;
@@ -1066,7 +1057,8 @@ Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_
     }
   }
 
-  return InitOutputDescInfo(output_op_list, output_descs_, output_formats_);
+  GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed");
+  return InitOutputDescInfo(output_op_list);
 }
 
 bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
@@ -1791,73 +1783,101 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp
 /// @ingroup ge
 /// @brief Get AIPP input info
 /// @param [in] index
-/// @param [out] aipp_info
+/// @param [int] OpDescPtr
 /// @return execute result
 ///
-Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
-  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
-  OpDescPtr data_op = data_op_list_[index];
-  if (!data_op->HasAttr(ATTR_NAME_AIPP)) {
-    GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index);
-    return ACL_ERROR_GE_AIPP_NOT_EXIST;
+Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
+  if (!op_desc->HasAttr(ATTR_NAME_AIPP)) {
+    GELOGW("there is not AIPP related with index %u.", index);
+    return SUCCESS;
   }
 
-  std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams());
-  GE_CHECK_NOTNULL(aipp_params);
-
-  ge::GeAttrValue::NAMED_ATTRS aipp_attr;
-  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
+  domi::AippOpParams aipp_params;
+  GeAttrValue::NAMED_ATTRS aipp_attr;
+  GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
                          "Data node do not contain param aipp!");
-  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed");
-  GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u",
-         data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank());
+  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed");
+  GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u",
+         op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank());
 
-  GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info),
+  AippConfigInfo aipp_info;
+  GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(&aipp_params, aipp_info),
                     "convert aipp params to aipp config info failed");
 
+  aipp_info_list_[index] = aipp_info;
   return SUCCESS;
 }
 
-Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) {
-  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
-  // Set default value
-  type = DATA_WITHOUT_AIPP;
-  aipp_index = 0xFFFFFFFF;  // default invalid value
-  OpDescPtr data_op = data_op_list_[index];
-  GE_CHECK_NOTNULL(data_op);
-  if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) {
+///
+/// @ingroup ge
+/// @brief Get AIPP input info
+/// @param [in] index
+/// @param [out] aipp_info
+/// @return execute result
+///
+Status DavinciModel::GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const {
+  const auto it = aipp_info_list_.find(index);
+  if (it == aipp_info_list_.end()) {
+    GELOGW("there is not AIPP related with index %u.", index);
+    return ACL_ERROR_GE_AIPP_NOT_EXIST;
+  }
+
+  aipp_info = it->second;
+  return SUCCESS;
+}
+
+Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list) {
+  if (!op_desc->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) {
     GELOGW("There is no aipp releated info with index %u.", index);
     return SUCCESS;
   }
-  std::string data_mode;
-  (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode);
+
+  // Set default value
+  InputAippType aipp_type = DATA_WITHOUT_AIPP;
+  string data_mode;
+  (void)AttrUtils::GetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, data_mode);
   if (data_mode == "static_aipp") {
-    type = DATA_WITH_STATIC_AIPP;
+    aipp_type = DATA_WITH_STATIC_AIPP;
   } else if (data_mode == "dynamic_aipp") {
-    type = DATA_WITH_DYNAMIC_AIPP;
+    aipp_type = DATA_WITH_DYNAMIC_AIPP;
   } else if (data_mode == "dynamic_aipp_conf") {
-    type = DYNAMIC_AIPP_NODE;
+    aipp_type = DYNAMIC_AIPP_NODE;
   } else {
     GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID,
            "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index);
     return ACL_ERROR_GE_AIPP_MODE_INVALID;
   }
 
-  if (type == DATA_WITH_DYNAMIC_AIPP) {
+  size_t aipp_index = 0xFFFFFFFF;  // default invalid value
+  if (aipp_type == DATA_WITH_DYNAMIC_AIPP) {
     string releated_name;
-    (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
-    for (size_t i = 0; i < data_op_list_.size(); ++i) {
-      GE_CHECK_NOTNULL(data_op_list_[i]);
-      if (data_op_list_[i]->GetName() == releated_name) {
-        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index);
-        aipp_index = i;
+    (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name);
+    for (const auto item : data_list) {
+      if (item.second->GetName() == releated_name) {
+        GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index);
+        aipp_index = item.first;
       }
     }
+
     if (aipp_index == 0xFFFFFFFF) {
-      GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "Can not find aipp data node from index %u", index);
-      return ACL_ERROR_GE_AIPP_NOT_EXIST;
+      GELOGW("Can not find aipp data node from index %u", index);
+      return SUCCESS;
     }
   }
+
+  aipp_type_list_[index] = { aipp_type, aipp_index };
+  return SUCCESS;
+}
+
+Status DavinciModel::GetAippType(uint32_t index, InputAippType &aipp_type, size_t &aipp_index) const {
+  const auto it = aipp_type_list_.find(index);
+  if (it == aipp_type_list_.end()) {
+    GELOGW("There is no aipp releated info with index %u.", index);
+    return SUCCESS;
+  }
+
+  aipp_type = it->second.first;
+  aipp_index = it->second.second;
   return SUCCESS;
 }
 
@@ -1873,7 +1893,7 @@ void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_
   dynamic_type_ = dynamic_type;
 }
 
-void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
+void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type) const {
   if (batch_size_.empty()) {
     GELOGD("User does not set dynamic size");
   }
@@ -1885,38 +1905,10 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami
   dynamic_type = dynamic_type_;
 }
 
-void DavinciModel::GetModelAttr(vector<string> &out_shape_info) {
+void DavinciModel::GetModelAttr(vector<string> &out_shape_info) const {
   out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end());
 }
 
-Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
-                                                       vector<InputOutputDescInfo> &output_desc,
-                                                       std::vector<uint32_t> &input_formats,
-                                                       std::vector<uint32_t> &output_formats) {
-  if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) {
-    GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!");
-    return FAILED;
-  }
-
-  GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");
-
-  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed");
-
-  GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR,
-                         "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(),
-                         output_memory_size_list_.size());
-
-  /// For function zero copy,the momery should be aligned by 512 bytes.
-  /// And, because of the cce op limit, size should be lager than the real shape size. The memory should be padded by 32
-  /// bytes.
-  /// *size equals to ((tensorDesc->dataSize + 2 * 32 - 1) / 32) * 32;
-  for (size_t i = 0; i < output_memory_size_list_.size(); i++) {
-    output_desc[i].size = output_memory_size_list_[i];
-  }
-
-  return SUCCESS;
-}
-
 void DavinciModel::SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format,
                                     InputOutputDescInfo &input) {
   uint32_t n, c, h, w;
@@ -1966,24 +1958,30 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format,
   }
 }
 
-Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
-  for (size_t index = 0; index < data_op_list_.size(); ++index) {
-    InputOutputDescInfo input;
-    GE_CHECK_NOTNULL(data_op_list_[index]);
-    GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0));
+Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) {
+  for (const auto &item : data_by_index) {
+    const auto op_desc = item.second;
+    GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));
 
-    Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat();
-    CreateInputDimsInfo(data_op_list_[index], format, input);
+    InputOutputDescInfo input;
+    Format format = op_desc->GetInputDescPtr(0)->GetFormat();
+    CreateInputDimsInfo(op_desc, format, input);
 
-    input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType();
-    input.name = data_op_list_[index]->GetName();
+    input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
+    input.name = op_desc->GetName();
     int64_t input_size = 0;
-    GE_CHK_STATUS_RET(TensorUtils::GetSize(*data_op_list_[index]->GetInputDescPtr(0), input_size),
-                      "get input size failed.");
+    GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
     input.size = input_size;
-    formats.push_back(format);
-    input_desc.push_back(input);
+    input_formats_.push_back(format);
+    input_descs_.push_back(input);
   }
+  return SUCCESS;
+}
+
+Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_descs, vector<uint32_t> &input_formats) {
+  input_descs.insert(input_descs.end(), input_descs_.begin(), input_descs_.end());
+  input_formats.insert(input_formats.end(), input_formats_.begin(), input_formats_.end());
+
   // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims
   is_new_model_desc_ = false;
   return SUCCESS;
@@ -2042,8 +2040,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO
   output.data_type = op_desc->GetInputDescPtr(index)->GetDataType();
 }
 
-Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
-                                        vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) {
+Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) {
   GELOGD("Output node size: %zu", output_op_list.size());
   for (const auto &op_desc : output_op_list) {
     uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
@@ -2068,28 +2065,20 @@ Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
                       std::to_string(src_index[index]);
       }
       output.name = output_name;
-      output_descs.push_back(output);
-      output_formats.push_back(format_result);
+      output_descs_.push_back(output);
+      output_formats_.push_back(format_result);
     }
   }
   return SUCCESS;
 }
 
-Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) {
+Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs,
+                                       vector<uint32_t> &output_formats) const {
   output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end());
   output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end());
   return SUCCESS;
 }
 
-ge::Format DavinciModel::GetFormat() {
-  if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) {
-    GELOGW("OP List Pointer is null or input_desc size is not 1!");
-    return FORMAT_NCHW;
-  }
-
-  return data_op_list_[0]->GetInputDescPtr(0)->GetFormat();
-}
-
 Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
   rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
   const std::vector<DataBuffer> &blobs = input_data.blobs;
@@ -4004,25 +3993,45 @@ void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_s
   }
 }
 
-Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) {
-  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
-  OpDescPtr data_op = data_op_list_[index];
-  if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
-    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index);
-    return ACL_ERROR_GE_AIPP_NOT_EXIST;
+Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc) {
+  if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
+    GELOGI("there is not AIPP related with index %u, node: %s.", index, op_desc->GetName().c_str());
+    return SUCCESS;
   }
 
-  vector<std::string> inputs;
-  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
+  vector<string> inputs;
+  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
     std::string input = inputs[kAippOriginInputIndex];
-    GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str());
+    GELOGI("origin input str: %s", input.c_str());
     std::vector<std::string> infos = ge::StringUtils::Split(input, ':');
     if (infos.size() != kAippInfoNum) {
-      GELOGW("origin input str is invalid.");
+      GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum);
+      return ACL_ERROR_GE_AIPP_MODE_INVALID;
     }
-    orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]);
-    orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]);
-    orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);
+
+    OriginInputInfo input_info;
+    input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]);
+    input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]);
+    input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);
+    orig_input_info_[index] = input_info;
+  } else {
+    OriginInputInfo input_info = { FORMAT_RESERVED, DT_UNDEFINED, 0 };
+    orig_input_info_[index] = input_info;
+  }
+
+  return SUCCESS;
+}
+
+Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const {
+  const auto it = orig_input_info_.find(index);
+  if (it == orig_input_info_.end()) {
+    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index);
+    return ACL_ERROR_GE_AIPP_NOT_EXIST;
+  }
+
+  const OriginInputInfo &input_info = it->second;
+  if (input_info.format != FORMAT_RESERVED || input_info.data_type != DT_UNDEFINED) {
+    orig_input_info = input_info;
   }
 
   return SUCCESS;
@@ -4032,7 +4041,8 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_
   GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str());
   std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':');
   if (infos.size() != kAippInfoNum) {
-    GELOGW("origin input str is invalid.");
+    GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum);
+    return;
   }
   dims_info.name = infos[kAippInfoTensorName];
   dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal);
@@ -4047,47 +4057,58 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_
   }
 }
 
-Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
-                                               std::vector<InputOutputDims> &output_dims) {
-  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
-  OpDescPtr data_op = data_op_list_[index];
-  if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
-    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index);
-    return ACL_ERROR_GE_AIPP_NOT_EXIST;
+Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc) {
+  if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
+    GELOGI("there is not AIPP related with index %u.", index);
+    return SUCCESS;
   }
 
-  vector<std::string> inputs;
-  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
-    GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size());
+  vector<string> inputs;
+  vector<InputOutputDims> input_dims;
+  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
+    GELOGI("Data: %s has %zu related aippInfo.", op_desc->GetName().c_str(), inputs.size());
     for (auto it : inputs) {
       InputOutputDims input_info;
       ParseAIPPInfo(it, input_info);
       input_dims.emplace_back(input_info);
-      GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str());
+      GELOGD("Aipp origin input dims info: %s", it.c_str());
 
-      ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex);
+      ConstGeTensorDescPtr data_input_desc = op_desc->GetInputDescPtr(kDataIndex);
       int64_t data_input_size;
-      (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size);
-      GELOGD(
-          "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: "
-          "%s, data_type: %s, shape: %s .",
-          index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
-          TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
-          TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
-          formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
+      (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size);
+      GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s",
+        index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
+        TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
+        TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
+        formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
     }
   }
 
-  vector<std::string> outputs;
-  if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) {
+  vector<string> outputs;
+  vector<InputOutputDims> output_dims;
+  if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) {
     for (auto it : outputs) {
       InputOutputDims output_info;
       ParseAIPPInfo(it, output_info);
       output_dims.emplace_back(output_info);
-      GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str());
+      GELOGD("Aipp output dims info: %s", it.c_str());
     }
   }
 
+  aipp_dims_info_[index] = { input_dims, input_dims };
+  return SUCCESS;
+}
+
+Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
+                                               vector<InputOutputDims> &output_dims) const {
+  const auto it = aipp_dims_info_.find(index);
+  if (it == aipp_dims_info_.end()) {
+    GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index);
+    return ACL_ERROR_GE_AIPP_NOT_EXIST;
+  }
+
+  input_dims = it->second.first;
+  output_dims = it->second.second;
   return SUCCESS;
 }
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 9ff59d4e..b5f546f1 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -286,13 +286,6 @@ class DavinciModel {
   // Modified from KernelTaskInfo.
   SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; }
 
-  ///
-  /// @ingroup ge
-  /// @brief get model input and output format
-  /// @return ccTensorFormat_t current model input and output format
-  ///
-  Format GetFormat();
-
   rtModel_t GetRtModelHandle() const { return rt_model_handle_; }
 
   rtStream_t GetRtModelStream() const { return rt_model_stream_; }
@@ -326,7 +319,7 @@ class DavinciModel {
   Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc);
 
   Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc,
-                                vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats);
+                                vector<uint32_t> &input_formats, vector<uint32_t> &output_formats);
 
   ///
   /// @ingroup ge
@@ -347,9 +340,9 @@ class DavinciModel {
 
   void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const;
 
-  void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type);
+  void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const;
 
-  void GetModelAttr(vector<string> &dynamic_output_shape_info);
+  void GetModelAttr(vector<string> &dynamic_output_shape_info) const;
 
   ///
   /// @ingroup ge
@@ -358,9 +351,9 @@ class DavinciModel {
   /// @param [out] aipp_info
   /// @return execute result
   ///
-  Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info);
+  Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const;
 
-  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index);
+  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const;
 
   ///
   /// @ingroup ge
@@ -378,17 +371,6 @@ class DavinciModel {
   ///
   void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification);
 
-  ///
-  /// @ingroup ge
-  /// @brief get model input and output desc for zero copy
-  /// @param [out] input_shape  model input size
-  /// @param [out] output_shape model output size
-  /// @return execute result
-  ///
-  Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
-                                           vector<InputOutputDescInfo> &output_desc,
-                                           vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats);
-
   Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data);
 
   Status ReturnNoOutput(uint32_t data_id);
@@ -538,9 +520,9 @@ class DavinciModel {
   Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
   void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }
 
-  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
+  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
   Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
-                                   vector<InputOutputDims> &output_dims);
+                                   vector<InputOutputDims> &output_dims) const;
   void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }
   // om file name
   void SetOmName(string om_name) { om_name_ = om_name; }
@@ -626,7 +608,7 @@ class DavinciModel {
   void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input);
 
   Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats);
-  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats);
+  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats) const;
 
   Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);
 
@@ -688,7 +670,7 @@ class DavinciModel {
   /// @param [in] output_op_list: list of NetOutput op.
   /// @return Status
   ///
-  Status OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);
+  Status GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);
 
   ///
   /// @ingroup ge
@@ -856,8 +838,13 @@ class DavinciModel {
   Status InitOutputTensorInfo(const OpDescPtr &op_desc);
   Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);
 
-  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list,
-                            vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &formats);
+  Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index);
+  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list);
+
+  Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
+  Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
+  Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list);
+  Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc);
 
   void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info);
   void SetLabelForDynamic(const NodePtr &node);
@@ -890,9 +877,6 @@ class DavinciModel {
 
   map<uint32_t, OpDescPtr> op_list_;  // release after DavinciModel::Init
 
-  // data op_desc
-  vector<OpDescPtr> data_op_list_;
-
   vector<OpDescPtr> variable_op_list_;
 
   map<uint32_t, ZeroCopyOffset> new_input_data_info_;
@@ -1048,6 +1032,13 @@ class DavinciModel {
   vector<int64_t> output_buffer_size_;
   vector<GeShape> output_shape_info_;
 
+  map<uint32_t, OriginInputInfo> orig_input_info_;
+  map<uint32_t, AippConfigInfo> aipp_info_list_;
+  map<uint32_t, pair<InputAippType, size_t>> aipp_type_list_;
+  map<uint32_t, pair<vector<InputOutputDims>, vector<InputOutputDims>>> aipp_dims_info_;
+
+  vector<InputOutputDescInfo> input_descs_;
+  vector<uint32_t> input_formats_;
   vector<InputOutputDescInfo> output_descs_;
   vector<uint32_t> output_formats_;
 };
diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/new_model_manager/davinci_model_parser.cc
index 34180d08..76526de2 100644
--- a/ge/graph/load/new_model_manager/davinci_model_parser.cc
+++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc
@@ -16,82 +16,7 @@
 
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 
-#include <fstream>
-#include <memory>
-#include <vector>
-#include "securec.h"
-
-#include "common/debug/log.h"
-#include "graph/load/new_model_manager/davinci_model.h"
-
 namespace ge {
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelInfoParser(const ModelData &model, ModelInfo &model_info) {
-  GE_CHK_RT_RET(rtSetDevice(0));
-  try {
-    uint32_t model_len = 0;
-    uint8_t *model_data = nullptr;
-
-    Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len);
-
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); return ret, "Parse model failed");
-
-    auto *file_header = reinterpret_cast<ModelFileHeader *>(model.model_data);
-
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_header == nullptr, GE_CHK_RT(rtDeviceReset(0));
-                                   return PARAM_INVALID, "file_header is null.");
-
-    model_info.version = file_header->version;
-    model_info.is_encrypt = false;
-    GE_IF_BOOL_EXEC(ENCRYPTED == file_header->is_encrypt, model_info.is_encrypt = true);
-
-    std::shared_ptr<DavinciModel> davinci_model =
-      std::shared_ptr<DavinciModel>(new (std::nothrow) DavinciModel(model.priority, nullptr));
-
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(davinci_model == nullptr, GE_CHK_RT(rtDeviceReset(0));
-                                   return PARAM_INVALID, "davinci_model is null.");
-
-    GE_MAKE_GUARD(davinci_model, [&] { davinci_model = nullptr; });
-
-    ModelHelper model_helper;
-    ret = model_helper.LoadModel(model);
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((ret != SUCCESS), GE_CHK_RT(rtDeviceReset(0)); return FAILED, "load model failed");
-
-    ret = davinci_model->Assign(model_helper.GetGeModel());
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
-                                   return ret, "Parse davinci model data failed");
-
-    ret = davinci_model->Init();
-
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
-                                   return ret, "Davinci model init failed");
-
-    vector<InputOutputDescInfo> input_list;
-    vector<InputOutputDescInfo> output_list;
-
-    ret = davinci_model->GetInputOutputDescInfo(input_list, output_list);
-
-    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
-                                   return ret, "Davinci model GetInputOutputDescInfo failed");
-
-    for (const auto &desc : input_list) {
-      model_info.input_desc.push_back(desc.shape_info);
-    }
-    for (const auto &desc : output_list) {
-      model_info.output_desc.push_back(desc.shape_info);
-    }
-
-    model_info.name = davinci_model->Name();
-  } catch (...) {
-    DOMI_LOGE("OM model parser failed, some exceptions occur !");
-    GE_CHK_RT(rtDeviceReset(0));
-    return FAILED;
-  }
-
-  GE_CHK_RT(rtDeviceReset(0));
-
-  return SUCCESS;
-}
-
 DavinciModelParser::DavinciModelParser() {}
 
 DavinciModelParser::~DavinciModelParser() {}
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index b2cce73a..22fddf86 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -995,16 +995,6 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
   return SUCCESS;
 }
 
-Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
-                                                       vector<InputOutputDescInfo> &output_desc,
-                                                       std::vector<uint32_t> &inputFormats,
-                                                       std::vector<uint32_t> &outputFormats) {
-  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
-  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
-      "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
-  return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
-}
-
 ///
 /// @ingroup ge
 /// @brief Get AIPP info
@@ -1013,11 +1003,11 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
 /// @param [out] aipp_info
 /// @return execute result
 ///
-Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
+Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
       "GetAIPPInfo failed, invalid model_id is %u.", model_id);
-  return davinci_model->GetAIPPInfo(index, aipp_info);
+  return davinci_model->GetAippInfo(index, aipp_info);
 }
 
 Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index 500cad31..418bae62 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -239,24 +239,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   /// @param [out] aipp_info
   /// @return execute result
   ///
-  ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
+  ge::Status GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
 
   ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
 
-  ///
-  /// @ingroup domi_ome
-  /// @brief set model input and output size zero copy
-  /// @param [in] model_id  model id
-  /// @param [out] input_shape   input tensor
-  /// @param [out] output_shape  output tensor
-  /// @return SUCCESS          success
-  /// @return PARAM_INVALID    parameter invalid
-  ///
-  ge::Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
-                                               std::vector<InputOutputDescInfo> &output_desc,
-                                               std::vector<uint32_t> &inputFormats,
-                                               std::vector<uint32_t> &outputFormats);
-
   ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
 
   ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index 1b78860d..3136e172 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -157,9 +157,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
 
   ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
 
-  ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
-                                         std::vector<ge::TensorDesc> &output_desc);
-
   ge::Status CommandHandle(const ge::Command &command);
 
   ge::Status SetDump(const DumpConfig &dump_config);
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 9e51585b..fe7c70c9 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -139,13 +139,14 @@ TEST_F(UtestDavinciModel, init_data_op) {
   model.runtime_param_.mem_size = 5120000;
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
 
-  OpDescPtr op_input = CreateOpDesc("data", DATA);
   GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
   TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_input = CreateOpDesc("data", DATA);
   op_input->AddInputDesc(tensor);
   op_input->AddOutputDesc(tensor);
   op_input->SetInputOffset({1024});
-  op_input->SetOutputOffset({5120});
+  op_input->SetOutputOffset({1024});
   NodePtr node_input = graph->AddNode(op_input);
 
   OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
@@ -168,12 +169,14 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) {
   model.runtime_param_.mem_size = 5120000;
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
 
-  OpDescPtr op_input = CreateOpDesc("data", DATA);
   GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_input = CreateOpDesc("data", DATA);
   op_input->AddInputDesc(tensor);
   op_input->AddOutputDesc(tensor);
   op_input->SetInputOffset({1024});
-  op_input->SetOutputOffset({5120});
+  op_input->SetOutputOffset({1024});
   NodePtr node = graph->AddNode(op_input);
 
   uint32_t data_op_index = 0;
@@ -192,8 +195,10 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
   model.runtime_param_.mem_size = 5120000;
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
 
-  OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
   GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
   op_output->AddInputDesc(tensor);
   op_output->SetInputOffset({1024});
   op_output->SetSrcName( { "data" } );
@@ -426,4 +431,332 @@ TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) {
   EXPECT_EQ(ret, SUCCESS);
 }
 
+TEST_F(UtestDavinciModel, init_data_aipp_info) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);
+
+  GeAttrValue::NAMED_ATTRS aipp_attr;
+  aipp_attr.SetAttr("aipp_mode", GeAttrValue::CreateFrom<GeAttrValue::INT>(domi::AippOpParams::dynamic));
+  aipp_attr.SetAttr("related_input_rank", GeAttrValue::CreateFrom<GeAttrValue::INT>(0));
+  aipp_attr.SetAttr("max_src_image_size", GeAttrValue::CreateFrom<GeAttrValue::INT>(2048));
+  aipp_attr.SetAttr("support_rotation", GeAttrValue::CreateFrom<GeAttrValue::INT>(1));
+  EXPECT_TRUE(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr));
+
+  AippConfigInfo aipp_info;
+  EXPECT_EQ(model.GetAippInfo(0, aipp_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetAippInfo(0, aipp_info), SUCCESS);
+  EXPECT_EQ(aipp_info.aipp_mode, domi::AippOpParams::dynamic);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_static) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);
+
+  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp");
+
+  InputAippType aipp_type;
+  size_t aipp_index = 0;
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(aipp_type, DATA_WITH_STATIC_AIPP);
+  EXPECT_EQ(aipp_index, 0xFFFFFFFFu);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_dynamic) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp");
+  AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp");
+
+  InputAippType aipp_type;
+  size_t aipp_index = 0;
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_releated) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("data", DATA);
+    op_desc->AddInputDesc(tensor);
+    op_desc->AddOutputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    op_desc->SetOutputOffset({1024});
+    NodePtr node = graph->AddNode(op_desc);   // op_index 0
+    AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp");
+    AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp");
+  }
+  {
+    OpDescPtr op_desc = CreateOpDesc("releated_aipp", DATA);
+    op_desc->AddInputDesc(tensor);
+    op_desc->AddOutputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    op_desc->SetOutputOffset({1024});
+    NodePtr node = graph->AddNode(op_desc);   // op_index 1
+  }
+
+  InputAippType aipp_type;
+  size_t aipp_index = 0;
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(aipp_type, DATA_WITH_DYNAMIC_AIPP);
+  EXPECT_EQ(aipp_index, 1);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 2);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 2);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_dynamic_conf) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf");
+
+  InputAippType aipp_type;
+  size_t aipp_index = 0;
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(aipp_type, DYNAMIC_AIPP_NODE);
+  EXPECT_EQ(aipp_index, 0xFFFFFFFFU);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_dynamic_invalid) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+  AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_invalid");
+
+  InputAippType aipp_type;
+  size_t aipp_index = 0;
+  EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS);
+  EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_input_info_empty) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+
+  vector<string> inputs = {};
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
+  vector<string> outputs = {};
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);
+
+  OriginInputInfo orig_input_info;
+  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_input_info_normal) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+
+  vector<string> inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
+  vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);
+
+  OriginInputInfo orig_input_info;
+  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_input_info_invalid) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+
+  vector<string> inputs = { "NCHW:DT_FLOAT:TensorName" };     // Invalid
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
+  vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);
+
+  OriginInputInfo orig_input_info;
+  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
+  EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID);
+  EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
+
+TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();   // for CustAICPUKernelStore::GetCustAICPUKernelStore()
+  model.runtime_param_.mem_base = (uint8_t *)0x08000000;
+  model.runtime_param_.mem_size = 5120000;
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
+  TensorUtils::SetSize(tensor, 512);
+
+  OpDescPtr op_desc = CreateOpDesc("data", DATA);
+  op_desc->AddInputDesc(tensor);
+  op_desc->AddOutputDesc(tensor);
+  op_desc->SetInputOffset({1024});
+  op_desc->SetOutputOffset({1024});
+  NodePtr node = graph->AddNode(op_desc);   // op_index 0
+
+  vector<string> inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs);
+  vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" };
+  AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs);
+
+  vector<InputOutputDims> input_dims;
+  vector<InputOutputDims> output_dims;
+  EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), ACL_ERROR_GE_AIPP_NOT_EXIST);
+  EXPECT_EQ(model.InitNodes(graph), SUCCESS);
+  EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), SUCCESS);
+  EXPECT_EQ(input_dims.size(), 1);
+  EXPECT_EQ(output_dims.size(), 1);
+
+  EXPECT_EQ(model.input_addrs_list_.size(), 1);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.op_list_.size(), 1);
+}
 }  // namespace ge
diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
index 43abc54b..fe886b49 100644
--- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
+++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc
@@ -1120,7 +1120,6 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_init_success) {
   op_desc->AddOutputDesc(descout);
   op_desc->SetId(0);
 
-  model.data_op_list_.push_back(op_desc);
   model.op_list_[0] = op_desc;
 
   domi::TaskDef task_def;

From 2697519926d43c97bca67bd31f434d2411ff12c8 Mon Sep 17 00:00:00 2001
From: zhengyuanhua <zhengyuanhua1@huawei.com>
Date: Mon, 4 Jan 2021 21:28:14 +0800
Subject: [PATCH 36/54] profiling training trace

---
 ge/common/profiling/profiling_manager.cc      |   2 +
 ge/common/types.cc                            |   3 +
 ge/graph/build/graph_builder.cc               |  52 +++++
 ge/graph/build/graph_builder.h                |   1 +
 ge/graph/build/task_generator.cc              | 113 +++++++---
 ge/graph/build/task_generator.h               |   7 +-
 .../load/new_model_manager/davinci_model.cc   |  14 +-
 .../load/new_model_manager/davinci_model.h    |   2 +
 ge/hybrid/executor/worker/execution_engine.cc |   2 +
 ge/hybrid/model/hybrid_model_builder.cc       | 196 +++++++++++++++++-
 ge/hybrid/model/hybrid_model_builder.h        |   6 +
 .../node_executor/rts/rts_node_executor.cc    |  33 +++
 .../node_executor/rts/rts_node_executor.h     |  13 ++
 ge/hybrid/node_executor/task_context.h        |   2 +-
 inc/framework/common/ge_types.h               |   2 +
 inc/framework/common/types.h                  |   3 +
 16 files changed, 420 insertions(+), 31 deletions(-)

diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index 92417286..aad2bbe3 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -302,6 +302,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
     }
 
     data.append(" model_id:").append(std::to_string(model_id));
+    data.append(" task_id:").append(std::to_string(graph.task_id));
+    data.append(" stream_id:").append(std::to_string(graph.stream_id));
     data.append("\n");
 
     GraphDescReport(device_id, data);
diff --git a/ge/common/types.cc b/ge/common/types.cc
index 1cc70347..268e7caa 100644
--- a/ge/common/types.cc
+++ b/ge/common/types.cc
@@ -480,6 +480,9 @@ REGISTER_OPTYPE_DEFINE(HVDWAIT, "HorovodWait");
 // aicpu op for online_infer dynamic_dims
 REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims");
 
+// profiling training trace node
+REGISTER_OPTYPE_DEFINE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");
+
 const std::string MODEL_ATTR_TASKS = "tasks";
 const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr";
 const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr";
diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc
index dce40c3e..143d5550 100644
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -421,6 +421,52 @@ static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph
   return SUCCESS;
 }
 
+Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
+  bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
+  com_graph->SetGraphUnknownFlag(false);
+
+  GELOGD("Start to mark profiling task attr for fp and bp.");
+  TaskGenerator task_generator;
+  ProfilingPoint profiling_point;
+  std::vector<uint32_t> all_reduce_node_index;
+  Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index);
+  com_graph->SetGraphUnknownFlag(original_unknown_shape_flag);
+  if (ret != SUCCESS) {
+    GELOGW("Find profiling node index failed.");
+  }
+  if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) {
+    GELOGD("No need to mark fp bp profiling task attr.");
+    return SUCCESS;
+  }
+  // mark profiling task attr for node
+  uint32_t node_index = 0;
+  for (const auto &node : com_graph->GetAllNodes()) {
+    OpDescPtr op_desc = node->GetOpDesc();
+    GE_CHECK_NOTNULL(node->GetOpDesc());
+    node_index++;
+    if (profiling_point.fp_index == node_index) {
+       GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
+      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true);
+    }
+    if (profiling_point.bp_index == node_index) {
+      GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
+      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
+    }
+    for (size_t i = 0; i < all_reduce_node_index.size(); i++) {
+      if (all_reduce_node_index[i] == node_index) {
+        GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
+        (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
+        continue;
+      }
+    }
+    if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {
+      GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
+      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true);
+    }
+  }
+  return SUCCESS;
+}
+
 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                                std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                                GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@@ -437,6 +483,12 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
     }
   }
 
+  // Set fp bp profiling task attr for graph
+  if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) {
+    GELOGE(FAILED, "Set fp bp profiling task attr for graph.");
+    return FAILED;
+  }
+
   auto all_graphs = comp_graph->GetAllSubgraphs();
   if (all_graphs.empty()) {
     all_graphs.push_back(comp_graph);
diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h
index b828a80d..524b60e0 100644
--- a/ge/graph/build/graph_builder.h
+++ b/ge/graph/build/graph_builder.h
@@ -60,6 +60,7 @@ class GraphBuilder {
   Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr);
   Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc);
   Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list);
+  Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph);
   Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                    GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                    uint64_t session_id = INVALID_SESSION_ID);
diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc
index 7e45ad61..21e82d11 100755
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -274,6 +274,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
   };
   GE_MAKE_GUARD(release, callback);
 
+  uint64_t all_reduce_node_idx = 0;
   for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
     OpDescPtr op_desc = node->GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
@@ -292,7 +293,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
     // Part2: Call
     auto fusion_task_info =
         FusionTaskInfo{run_context,        graph,         node,        op_desc,         node_index,      ge_lib,
-                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes};
+                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx};
     GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen),
                       "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str());
     // continue directly
@@ -316,7 +317,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
                       type.c_str());
     // Profiling task
     size_t task_list_size_before = task_def_list.size();
-    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
+    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
+                                                node_index, task_def_list, all_reduce_node_idx));
     int64_t op_id = op_desc->GetId();
     // Compatible with dynamic shape scenes, the default is 0
     int64_t stream_id = 0;
@@ -336,8 +338,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
       return ret;
     }
     // Profiling task
-    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
-
+    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
+                                               node_index, task_def_list, all_reduce_node_idx));
     size_t task_list_size_after = task_def_list.size();
     // If tasks is reduced
     if (task_list_size_after < task_list_size_before) {
@@ -380,6 +382,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
   auto &op_name_map = fusion_task_info.op_name_map;
   auto &profiling_point = fusion_task_info.profiling_point;
   auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes;
+  auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx;
   // If op_desc have this attr, call nodes with same group key in a stream together
   if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) &&
       (fusion_nodes_seen.count(node.get()) == 0)) {
@@ -426,7 +429,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         return INTERNAL_ERROR;
       }
       // profiling task
-      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
+      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
+                                      node_index, task_def_list, all_reduce_idx);
       run_context.stream = run_context.graphStreamList[stream_id];
       GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.",
              op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
@@ -439,7 +443,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
         return ret;
       }
       // profiling task
-      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
+      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
+                                     node_index, task_def_list, all_reduce_idx);
       size_t task_list_size_after = task_def_list.size();
       // if tasks is reduced
       if (task_list_size_after < task_list_size_before) {
@@ -830,6 +835,11 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint
   return SUCCESS;
 }
 
+Status TaskGenerator::FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
+                                             std::vector<uint32_t> &all_reduce_nodes) {
+  return FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes);
+}
+
 Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                              vector<uint32_t> &all_reduce_nodes) const {
   GE_CHECK_NOTNULL(graph);
@@ -840,7 +850,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
     GELOGD("Profiling is not open.");
     return SUCCESS;
   }
-
   GELOGI("Start get FP/BP index.");
   std::string fp_point_str;
   std::string bp_point_str;
@@ -878,18 +887,27 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
   return SUCCESS;
 }
 
-
 Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                                 vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                                vector<domi::TaskDef> &task_def_list) {
+                                                vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) {
   const char *profiling_mode = std::getenv(kProfilingMode);
   bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                       ProfilingManager::Instance().ProfilingTrainingTraceOn();
-  if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
-      (profiling_point.end_index.empty())) {
+  bool is_insert_fp_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
+  bool is_insert_bp_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
+  bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
+                                   (profiling_point.end_index.empty())) &&
+                                  (!(is_insert_fp_profiling_task || is_insert_bp_profiling_task));
+  if (!is_profiling || no_insert_profiling_task) {
     return SUCCESS;
   }
-  if (profiling_point.fp_index == node_index) {
+  GELOGD("Insert fp profiling task: %d, insert bp profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
+         is_insert_fp_profiling_task, is_insert_bp_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
+         profiling_point.end_index.size());
+
+  if ((profiling_point.fp_index == node_index) || is_insert_fp_profiling_task) {
     uint64_t jobid_log_id = ge::GetContext().TraceId();
     GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id);
 
@@ -913,22 +931,40 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
     task_def_list.emplace_back(fp_task_def);
   }
 
-  for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
-    if (all_reduce_nodes[i] != node_index) {
-      continue;
+  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
+  uint64_t all_reduce_task_idx = 0;
+  bool is_insert_all_reduce_task = false;
+  if (is_all_reduce && is_insert_bp_profiling_task) {
+    all_reduce_task_idx = all_reduce_node_idx;
+    is_insert_all_reduce_task = true;
+  }
+  if (is_all_reduce) {
+    all_reduce_node_idx++;
+  }
+  if (!is_insert_all_reduce_task) {
+    for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
+      if (all_reduce_nodes[i] == node_index) {
+        all_reduce_task_idx = i;
+        is_insert_all_reduce_task = true;
+        break;
+      }
     }
+  }
+
+  if (is_insert_all_reduce_task) {
     GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
     TaskDef ar_task_def;
     ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
     ar_task_def.set_stream_id(op_desc->GetStreamId());
     LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
     if (ar_log_def != nullptr) {
-      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
+      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
                       GELOGE(FAILED, "Multiply result is out of range.");
                       return FAILED);
-      auto log_id = i * kProfilingArStep + kProfilingArStartLogid;
+      auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid;
       ar_log_def->set_logid(log_id);
       ar_log_def->set_notify(false);
+      (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
     }
     task_def_list.push_back(ar_task_def);
   }
@@ -937,16 +973,27 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
 
 Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                                vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                               vector<domi::TaskDef> &task_def_list) {
+                                               vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) {
   GE_CHECK_NOTNULL(op_desc);
   const char *profiling_mode = std::getenv(kProfilingMode);
   bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                       ProfilingManager::Instance().ProfilingTrainingTraceOn();
-  if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
-      (profiling_point.end_index.empty())) {
+  bool is_insert_bp_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
+  bool is_insert_end_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
+  bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
+                                   (profiling_point.end_index.empty())) &&
+                                  (!(is_insert_bp_profiling_task || is_insert_end_profiling_task));
+  if (!is_profiling || no_insert_profiling_task) {
     return SUCCESS;
   }
-  if (profiling_point.bp_index == node_index) {
+  GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
+         is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
+         profiling_point.end_index.size() );
+
+  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
+  if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) {
     GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
     TaskDef bp_task_def;
     bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
@@ -957,7 +1004,9 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
     bp_log_def->set_notify(false);
     task_def_list.emplace_back(bp_task_def);
   }
-  if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {
+
+  if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end() ||
+      is_insert_end_profiling_task) {
     GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
     TaskDef end_task_def;
     end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
@@ -969,20 +1018,32 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
     task_def_list.emplace_back(end_task_def);
   }
 
+  uint32_t all_reduce_task_idx = 0;
+  bool is_insert_all_reduce_task = false;
+  if (is_all_reduce && is_insert_bp_profiling_task) {
+    all_reduce_task_idx = all_reduce_node_idx;
+    is_insert_all_reduce_task = true;
+  }
+
   for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
-    if (all_reduce_nodes[i] != node_index) {
-      continue;
+    if (all_reduce_nodes[i] == node_index) {
+      all_reduce_task_idx = i;
+      is_insert_all_reduce_task = true;
+      break;
     }
+  }
+
+  if (is_insert_all_reduce_task) {
     GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
     TaskDef ar_task_def;
     ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
     ar_task_def.set_stream_id(op_desc->GetStreamId());
     LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
     GE_CHECK_NOTNULL(ar_log_def);
-    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
+    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
                     GELOGE(FAILED, "Multiply result is out of range.");
                     return FAILED);
-    auto log_id = i * kProfilingArStep + kProfilingArEndLogid;
+    auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid;
     ar_log_def->set_logid(log_id);
     ar_log_def->set_notify(false);
     task_def_list.emplace_back(ar_task_def);
diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h
index c93b2007..5970954c 100755
--- a/ge/graph/build/task_generator.h
+++ b/ge/graph/build/task_generator.h
@@ -51,6 +51,7 @@ struct FusionTaskInfo {
   std::map<uint32_t, string> &op_name_map;
   ProfilingPoint &profiling_point;
   vector<uint32_t> all_reduce_nodes;
+  uint64_t all_reduce_node_idx;
 };
 
 class TaskGenerator {
@@ -76,6 +77,8 @@ class TaskGenerator {
   ///
   Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context);
 
+  Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
+                                std::vector<uint32_t> &all_reduce_nodes);
  private:
   Status UpdateAnchorStatus(const NodePtr &node);
 
@@ -126,10 +129,10 @@ class TaskGenerator {
                                 std::vector<uint32_t> &all_reduce_nodes) const;
   Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                    std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                   std::vector<domi::TaskDef> &task_def_list);
+                                   std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx);
   Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                   std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
-                                  std::vector<domi::TaskDef> &task_def_list);
+                                  std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx);
 
   static bool IsProfPoint(const OpDescPtr &op, const std::string &name);
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 706d4b3b..ad5ee49b 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -3113,6 +3113,8 @@ Status DavinciModel::DistributeTask() {
     task_desc_info.stream_id = task->GetStreamId();
     task_desc_info.shape_type = "static";
     task_desc_info.cur_iter_num = 0;
+    profiler_report_op_info_[task_desc_info.op_name] =
+      std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
     task_desc_info_.emplace_back(task_desc_info);
     if (flag) {
       if (task->GetSktTaskID() != 0xFFFFFFFF) {
@@ -3120,6 +3122,8 @@ Status DavinciModel::DistributeTask() {
         string op_name = "super_kernel_" + to_string(task_index);
         task_desc_info.op_name = op_name;
         task_desc_info.task_id = task->GetSktTaskID();
+        profiler_report_op_info_[task_desc_info.op_name] =
+          std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
         task_desc_info_.emplace_back(task_desc_info);
       }
     }
@@ -3991,7 +3995,15 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des
     compute_graph_info.output_format = op_desc.output_format;
     compute_graph_info.output_shape = op_desc.output_shape;
     compute_graph_info.output_data_type = op_desc.output_data_type;
-
+    uint32_t task_id = 0;
+    uint32_t stream_id = 0;
+    auto iter = profiler_report_op_info_.find(op_desc.op_name);
+    if (iter != profiler_report_op_info_.end()) {
+      task_id = iter->second.first;
+      stream_id = iter->second.second;
+    }
+    compute_graph_info.task_id = task_id;
+    compute_graph_info.stream_id = stream_id;
     graph_desc_info.emplace_back(compute_graph_info);
   }
   return SUCCESS;
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 9ff59d4e..893dfc2a 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -978,6 +978,8 @@ class DavinciModel {
   // for profiling task and graph info
   vector<TaskDescInfo> task_desc_info_;
 
+  std::map<std::string, std::pair<uint32_t, uint32_t>> profiler_report_op_info_;
+
   int64_t maxDumpOpNum_;
   // for data dump
   DataDumper data_dumper_;
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index 21dd8e4b..e9c6ef29 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -221,6 +221,8 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel
       tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
       tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
     }
+    tmp_compute_graph_info.task_id = context_->GetTaskId();
+    tmp_compute_graph_info.stream_id = context_->GetStreamId();
     compute_graph_info.emplace_back(tmp_compute_graph_info);
     GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str());
   }
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index 46c9c39b..32fc495a 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -35,11 +35,22 @@
 
 namespace ge {
 namespace hybrid {
+using domi::LogTimeStampDef;
+using domi::TaskDef;
 namespace {
 const uint32_t kSubgraphIndex = 0U;
 const uint32_t kVarOutputIndex = 0U;
+const uint64_t kProfilingFpStartLogid = 1U;
+const uint64_t kProfilingBpEndLogid = 2U;
+const uint64_t kProfilingIterEndLogid = 65535U;
 const int kBytes = 8;
 const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown";
+const char *const kProfilingGraph = "ProfilingGraph";
+const char *const kProfilingFpNode = "ProfilingFpNode";
+const char *const kProfilingBpNode = "ProfilingBpNode";
+const char *const kProfilingEndNode = "ProfilingEndNode";
+const char *const kProfilingArNode = "ProfilingAllReduceNode";
+const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE";
 
 Status SetOutputNameAttr(ComputeGraph &graph) {
   vector<string> output_names;
@@ -1531,6 +1542,188 @@ Status HybridModelBuilder::RecoverGraphUnknownFlag() {
   return SUCCESS;
 }
 
+Status HybridModelBuilder::GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) {
+  uint64_t jobid_log_id = ge::GetContext().TraceId();
+  GELOGD("The first FP operator is %s,, job_id %lu", op_desc->GetName().c_str(), jobid_log_id);
+
+  TaskDef job_task_def;
+  job_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
+  job_task_def.set_stream_id(op_desc->GetStreamId());
+  LogTimeStampDef *job_log_def = job_task_def.mutable_log_timestamp();
+  if (job_log_def != nullptr) {
+    job_log_def->set_logid(jobid_log_id);
+    job_log_def->set_notify(false);
+  }
+  task_def_list.emplace_back(job_task_def);
+  TaskDef fp_task_def;
+  fp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
+  fp_task_def.set_stream_id(op_desc->GetStreamId());
+  LogTimeStampDef *fp_log_def = fp_task_def.mutable_log_timestamp();
+  if (fp_log_def != nullptr) {
+    fp_log_def->set_logid(kProfilingFpStartLogid);
+    fp_log_def->set_notify(false);
+  }
+  task_def_list.emplace_back(fp_task_def);
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id,
+                                                   vector<domi::TaskDef> &task_def_list) {
+  TaskDef ar_task_def;
+  ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
+  ar_task_def.set_stream_id(op_desc->GetStreamId());
+  LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
+  if (ar_log_def != nullptr) {
+    ar_log_def->set_logid(log_id);
+    ar_log_def->set_notify(false);
+  }
+  task_def_list.emplace_back(ar_task_def);
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) {
+    TaskDef bp_task_def;
+    bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
+    bp_task_def.set_stream_id(op_desc->GetStreamId());
+    LogTimeStampDef *bp_log_def = bp_task_def.mutable_log_timestamp();
+    GE_CHECK_NOTNULL(bp_log_def);
+    bp_log_def->set_logid(kProfilingBpEndLogid);
+    bp_log_def->set_notify(false);
+    task_def_list.emplace_back(bp_task_def);
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) {
+  TaskDef end_task_def;
+  end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
+  end_task_def.set_stream_id(op_desc->GetStreamId());
+  LogTimeStampDef *end_log_def = end_task_def.mutable_log_timestamp();
+  GE_CHECK_NOTNULL(end_log_def);
+  end_log_def->set_logid(kProfilingIterEndLogid);
+  end_log_def->set_notify(true);
+  task_def_list.emplace_back(end_task_def);
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node) {
+  GE_CHECK_NOTNULL(node);
+  const OpDescPtr &op_desc = node->GetOpDesc();
+  GE_CHECK_NOTNULL(op_desc);
+  const auto &compute_graph = MakeShared<ComputeGraph>(kProfilingGraph);
+  GE_CHECK_NOTNULL(compute_graph);
+
+  NodePtr node_ptr = nullptr;
+  vector<domi::TaskDef> task_def_list;
+  // create fp node
+  bool is_insert_fp_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
+  if (is_insert_fp_profiling_task) {
+    (void)GenerateFpProfilingTask(op_desc, task_def_list);
+    auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE);
+    GE_CHECK_NOTNULL(fp_desc);
+    fp_desc->SetOpKernelLibName(kEngineNameRts);
+    node_ptr = compute_graph->AddNode(fp_desc);
+    GELOGD("Create fp profiling node success before.");
+  }
+  // creat all reduce start node
+  bool is_insert_bp_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
+  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
+  if (is_all_reduce && is_insert_bp_profiling_task) {
+    int64_t log_id = 0;
+    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
+    GELOGD("All reduce node profiling task log id: %ld before", log_id);
+    (void) GenerateArProfilingTask(op_desc, log_id, task_def_list);
+    string op_name = string(kProfilingArNode) + std::to_string(log_id);
+    auto ar_desc_start = MakeShared<OpDesc>(op_name, PROFILINGTRAININGTRACE);
+    GE_CHECK_NOTNULL(ar_desc_start);
+    ar_desc_start->SetOpKernelLibName(kEngineNameRts);
+    node_ptr = compute_graph->AddNode(ar_desc_start);
+    GELOGD("Create all reduce start profiling node success before.");
+  }
+
+  if (node_ptr != nullptr) {
+    for (const auto &task_def : task_def_list) {
+      hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
+    }
+    NodeItem *node_item = nullptr;
+    GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
+    node_item->input_start = 0;
+    node_item->output_start = 0;
+    graph_item.node_items_.emplace_back(node_item);
+  } else {
+    GELOGD("No need to create profiling node before.");
+  }
+
+  return SUCCESS;
+}
+
+Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node) {
+  GE_CHECK_NOTNULL(node);
+  const OpDescPtr &op_desc = node->GetOpDesc();
+  GE_CHECK_NOTNULL(op_desc);
+  const auto &compute_graph = MakeShared<ComputeGraph>(kProfilingGraph);
+  GE_CHECK_NOTNULL(compute_graph);
+
+  NodePtr node_ptr = nullptr;
+  vector<domi::TaskDef> task_def_list;
+  // Create all reduce end node
+  bool is_insert_bp_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
+  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
+  if (is_all_reduce && is_insert_bp_profiling_task) {
+    int64_t log_id = 0;
+    (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
+    GELOGD("All reduce node profiling task log id: %ld after", log_id);
+    (void) GenerateArProfilingTask(op_desc, log_id + 1, task_def_list);
+    string op_name = string(kProfilingArNode) + std::to_string(log_id + 1);
+    auto ar_desc_end = MakeShared<OpDesc>(op_name, PROFILINGTRAININGTRACE);
+    GE_CHECK_NOTNULL(ar_desc_end);
+    ar_desc_end->SetOpKernelLibName(kEngineNameRts);
+    node_ptr = compute_graph->AddNode(ar_desc_end);
+    GELOGD("Create all reduce end profiling node success after.");
+  }
+  // create bp node
+  if (!is_all_reduce && is_insert_bp_profiling_task) {
+    (void) GenerateBpProfilingTask(op_desc, task_def_list);
+    auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE);
+    GE_CHECK_NOTNULL(bp_op_desc);
+    bp_op_desc->SetOpKernelLibName(kEngineNameRts);
+    node_ptr = compute_graph->AddNode(bp_op_desc);
+    GELOGD("Create bp profiling node success after.");
+  }
+  // create end node
+  bool is_insert_end_profiling_task = false;
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
+  if (is_insert_end_profiling_task) {
+    (void)GenerateEndProfilingTask(op_desc, task_def_list);
+    auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE);
+    GE_CHECK_NOTNULL(end_desc);
+    end_desc->SetOpKernelLibName(kEngineNameRts);
+    node_ptr = compute_graph->AddNode(end_desc);
+    GELOGD("Create end profiling node success after.");
+  }
+
+  if (node_ptr != nullptr) {
+    for (const auto &task_def : task_def_list) {
+      hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
+    }
+    NodeItem *node_item = nullptr;
+    GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
+    node_item->input_start = 0;
+    node_item->output_start = 0;
+    graph_item.node_items_.emplace_back(node_item);
+  } else {
+    GELOGD("No need to create profiling node after.");
+  }
+
+  return SUCCESS;
+}
+
 Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) {
   GELOGD("Start to load subgraph [%s]", graph.GetName().c_str());
   // for known partitioned call, load all nodes
@@ -1567,8 +1760,9 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root
       graph_item->output_node_ = node_item;
       GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph));
     }
-
+    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node));
     graph_item->node_items_.emplace_back(node_item);
+    GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node));
     // parse var outputs
     GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item));
     GELOGD("NodeItem created: %s", node_item->DebugString().c_str());
diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h
index a11faae2..55a19b6c 100644
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -79,6 +79,12 @@ class HybridModelBuilder {
   Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
   Status RecoverGraphUnknownFlag();
   Status CheckAicpuOpList();
+  Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node);
+  Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node);
+  Status GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
+  Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
+  Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
+  Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list);
 
   const char* GetGraphName() const {
     return hybrid_model_.model_name_.c_str();
diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.cc b/ge/hybrid/node_executor/rts/rts_node_executor.cc
index 18b875fd..90b623e0 100644
--- a/ge/hybrid/node_executor/rts/rts_node_executor.cc
+++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc
@@ -18,6 +18,7 @@
 #include "common/debug/log.h"
 #include "common/ge/ge_util.h"
 #include "graph/utils/tensor_utils.h"
+#include "hybrid/model/hybrid_model.h"
 #include "runtime/rt.h"
 
 namespace ge {
@@ -79,12 +80,44 @@ Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function<void(
   return SUCCESS;
 }
 
+Status ProfilingTraceNodeTask::UpdateArgs(TaskContext &context) {
+  return SUCCESS;
+}
+
+Status ProfilingTraceNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
+  for (const auto &task_def : task_defs_) {
+    auto log_time_stamp_def = task_def.log_timestamp();
+    uint64_t log_id = log_time_stamp_def.logid();
+    bool notify = log_time_stamp_def.notify();
+    uint32_t flat = log_time_stamp_def.flat();
+
+    GELOGD("ProfilingTraceTask execute async start. logid = %lu, notify = %d.", log_id, notify);
+    rtError_t rt_ret = rtProfilerTrace(log_id, notify, flat, context.GetStream());
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+    GELOGD("[%s] ProfilingTraceTask[%lu] execute success.", context.GetNodeName(), log_id);
+  }
+
+  return SUCCESS;
+};
+
 Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
+  GE_CHECK_NOTNULL(node);
+
   auto op_type = node->GetType();
   if (op_type == IDENTITY) {
     task = MakeShared<IdentityNodeTask>();
   } else if (op_type == IDENTITYN) {
     task = MakeShared<IdentityNNodeTask>();
+  } else if (op_type == PROFILINGTRAININGTRACE) {
+    auto *task_defs = model.GetTaskDefs(node);
+    if (task_defs == nullptr || task_defs->empty()) {
+      GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute.");
+      return INTERNAL_ERROR;
+    }
+    task = MakeShared<ProfilingTraceNodeTask>(*task_defs);
   } else {
     GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str());
     return INTERNAL_ERROR;
diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.h b/ge/hybrid/node_executor/rts/rts_node_executor.h
index 2576b73b..df487d6c 100644
--- a/ge/hybrid/node_executor/rts/rts_node_executor.h
+++ b/ge/hybrid/node_executor/rts/rts_node_executor.h
@@ -18,6 +18,7 @@
 #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_
 
 #include "hybrid/node_executor/node_executor.h"
+#include "proto/task.pb.h"
 
 namespace ge {
 namespace hybrid {
@@ -35,6 +36,18 @@ class IdentityNNodeTask : public IdentityNodeTask {
   Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
 };
 
+class ProfilingTraceNodeTask :  public NodeTask {
+ public:
+  explicit ProfilingTraceNodeTask(const std::vector<domi::TaskDef> &task_defs) : task_defs_(task_defs) {}
+  ~ProfilingTraceNodeTask() override = default;
+
+  Status UpdateArgs(TaskContext &context) override;
+  Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
+
+ private:
+  std::vector<domi::TaskDef> task_defs_;
+};
+
 class RtsNodeExecutor : public NodeExecutor {
  public:
   Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const override;
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index 0e85a8e3..8ba4fb90 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -123,7 +123,7 @@ class TaskContext {
   Status status_ = SUCCESS;
   std::vector<void *> workspaces_;
   uint64_t iteration_ = 0;
-  uint32_t task_id_= 0;
+  uint32_t task_id_ = 0;
   uint32_t stream_id_ = 0;
 };
 }  // namespace hybrid
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 4267aec4..685e03fd 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -263,6 +263,8 @@ struct ComputeGraphDescInfo {
   std::vector<Format> output_format;
   std::vector<std::vector<int64_t>> output_shape;
   std::vector<DataType> output_data_type;
+  uint32_t task_id;
+  uint32_t stream_id;
 };
 
 struct OpDescInfo {
diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h
index 99c2ea03..e3baa816 100644
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -529,6 +529,9 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait");
 // aicpu op for online_infer dynamic_dims
 REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims");
 
+// profiling training trace node
+REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");
+
 enum InputMode { INPUT = 0, CONST_INPUT };
 
 // Definition of the processing status enum of the process module

From 0ed8136d003d3455ed0fba51ae6334a685d19fdf Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Mon, 4 Jan 2021 22:49:19 +0800
Subject: [PATCH 37/54] 	modified:   ge/graph/preprocess/graph_preprocess.cc

---
 ge/graph/preprocess/graph_preprocess.cc | 52 +++++++++++++------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index f6a9ea80..2ae39b3c 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -899,6 +899,23 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
   }
   return SUCCESS;
 }
+long StringToLongNoThrow(const string &str) {
+  try {
+    return std::stol(str);
+  } catch (const std::invalid_argument) {
+    GELOGE(PARAM_INVALID,
+           "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
+           "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+           str.c_str());
+    return PARAM_INVALID;
+  } catch (const std::out_of_range) {
+    GELOGE(PARAM_INVALID,
+           "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
+           "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
+           str.c_str());
+    return PARAM_INVALID;
+  }
+}
 /**
  * Parser shape_range from string to vector
  * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]"
@@ -910,7 +927,7 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
     GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str());
     return PARAM_INVALID;
   }
-  // different shape_ragne of single input are split by ']'
+  // different shape_range of single input are split by ']'
   vector<string> shape_range_set = ge::StringUtils::Split(shape_range, ']');
   if (shape_range_set.empty()) {
     GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
@@ -919,22 +936,16 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
   }
   for (auto &shape_range_str : shape_range_set) {
     if (shape_range_str.empty()) {
-      GELOGE(PARAM_INVALID,
-             "Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
-             shape_range.c_str());
-      return PARAM_INVALID;
+      continue;
     }
     // trim start bytes, after that, single input should be "1~20,3,3~6,-1"
     if (ge::StringUtils::StartWith(shape_range_str, "[")) {
       shape_range_str = shape_range_str.substr(1, shape_range_str.size());
-    } else if (ge::StringUtils::StartWith(shape_range_str, ",")) {
+    }
+    if (ge::StringUtils::StartWith(shape_range_str, ",")) {
       shape_range_str = shape_range_str.substr(2, shape_range_str.size());
-    } else {
-      GELOGE(PARAM_INVALID,
-             "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
-             shape_range.c_str());
-      return PARAM_INVALID;
     }
+
     // parse shape_range of single input. eg. "1~20,3,3~6,-1"
     std::vector<std::pair<int64_t, int64_t>> range_of_single_input;
     vector<string> dim_range_set = ge::StringUtils::Split(shape_range_str, ',');
@@ -943,26 +954,17 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range,
       pair<int64_t, int64_t> range_pair;
       if (range_pair_set.size() == 1) {
         // fix dim
-        auto range_value = std::stol(range_pair_set.at(0).c_str());
+        auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str());
         if (range_value < 0) {
-          range_pair = std::make_pair(1, range_value);
+          range_pair = std::make_pair(0, range_value);
         } else {
           range_pair = std::make_pair(range_value, range_value);
         }
       } else if (range_pair_set.size() == 2) {
         // unknown dim, should get range.
-        try {
-          auto range_left = std::stol(range_pair_set.at(0).c_str());
-          auto range_right = std::stol(range_pair_set.at(1).c_str());
-          range_pair = std::make_pair(range_left, range_right);
-        } catch (const std::invalid_argument) {
-          GELOGE(
-            PARAM_INVALID,
-            "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: "
-            "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",
-            shape_range.c_str());
-          return PARAM_INVALID;
-        }
+        auto range_left = StringToLongNoThrow(range_pair_set.at(0).c_str());
+        auto range_right = StringToLongNoThrow(range_pair_set.at(1).c_str());
+        range_pair = std::make_pair(range_left, range_right);
       } else {
         GELOGE(PARAM_INVALID,
                "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"",

From 50b8b31008c7619c7521eb1e318523c52dd0c917 Mon Sep 17 00:00:00 2001
From: zhaoxinxin <zhaoxinxin1@huawei.com>
Date: Mon, 4 Jan 2021 23:20:38 +0800
Subject: [PATCH 38/54] 	modified:   ge/graph/preprocess/graph_preprocess.cc

---
 ge/graph/preprocess/graph_preprocess.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index 2ae39b3c..9672c497 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -1032,7 +1032,7 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
   desc.SetShape(origin_shape);
   desc.SetShapeRange(current_shape_range_vec);
 
-  /*int64_t dynamic_shape_size = 1;
+  int64_t dynamic_shape_size = 1;
   for (const auto range_pair : range_vec.at(index)) {
     FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second);
     dynamic_shape_size *= range_pair.second;
@@ -1046,7 +1046,7 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index,
   FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size);
   dynamic_shape_size *= data_type_size;
   GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size);
-  ge::TensorUtils::SetSize(desc, dynamic_shape_size);*/
+  ge::TensorUtils::SetSize(desc, dynamic_shape_size);
   graphStatus graph_ret = op->UpdateInputDesc(0, desc);
   GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret);
   graph_ret = op->UpdateOutputDesc(0, desc);

From 7f7b662750642446f060c2580259fca56fb1c883 Mon Sep 17 00:00:00 2001
From: zhangxiaokun <zhang.xiaokun@huawei.com>
Date: Tue, 5 Jan 2021 10:17:40 +0800
Subject: [PATCH 39/54] Delete useless kOutputNum

---
 ge/graph/load/new_model_manager/davinci_model.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 78f6f8bf..37a39308 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -75,7 +75,6 @@
 namespace ge {
 namespace {
 const uint32_t kDataIndex = 0;
-const uint32_t kOutputNum = 1;
 const uint32_t kTrueBranchStreamNum = 1;
 const uint32_t kGetDynamicDimsCount = 1;
 const uint32_t kThreadNum = 16;

From 5f68aaa0c7f7c702ef2eaa0e5e84bc711b0ce2c3 Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 30 Dec 2020 10:11:34 +0800
Subject: [PATCH 40/54] rm compile macro

---
 ge/CMakeLists.txt                             |  8 +--
 ge/executor/CMakeLists.txt                    |  2 +-
 ge/ge_local_engine/engine/host_cpu_engine.cc  | 38 -----------
 ge/graph/manager/graph_manager.cc             |  6 --
 ge/graph/manager/graph_mem_allocator.cc       |  8 ---
 ge/graph/manager/graph_mem_allocator.h        |  6 --
 ge/graph/manager/host_mem_allocator.h         |  2 +-
 ge/graph/manager/host_mem_manager.cc          |  9 ---
 ge/graph/manager/host_mem_manager.h           |  4 --
 ge/graph/optimize/graph_optimize.cc           |  6 +-
 ge/graph/passes/assign_remove_pass.cc         | 67 +------------------
 ge/graph/passes/assign_remove_pass.h          |  3 +-
 ge/graph/passes/constant_fuse_same_pass.cc    |  6 --
 ge/graph/passes/constant_fuse_same_pass.h     | 13 ----
 ge/graph/preprocess/graph_preprocess.cc       | 11 ---
 ge/hybrid/common/npu_memory_allocator.cc      | 10 ---
 ge/hybrid/model/hybrid_model_builder.cc       | 14 ----
 .../host_cpu/host_cpu_node_executor.cc        | 15 -----
 18 files changed, 13 insertions(+), 215 deletions(-)

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 8d9edb65..e94258ac 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -124,7 +124,7 @@ set(TRAIN_SRC_LIST
     "graph/manager/graph_var_manager.cc"
     "graph/manager/host_mem_manager.cc"
     "graph/manager/rdma_pool_allocator.cc"
-    $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
+    "graph/manager/host_mem_allocator.cc"
     "graph/manager/memory_api.cc"
     "graph/manager/model_manager/event_manager.cc"
     "graph/manager/trans_var_data_utils.cc"
@@ -166,7 +166,7 @@ set(TRAIN_SRC_LIST
     "graph/passes/hccl_group_pass.cc"
     "graph/passes/enter_pass.cc"
     "graph/passes/assign_remove_pass.cc"
-    $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
+    "graph/passes/inplace_support_check_pass.cc"
     "graph/passes/flow_ctrl_pass.cc"
     "graph/passes/global_step_insert_pass.cc"
     "host_kernels/transpose_kernel.cc"
@@ -409,7 +409,7 @@ set(INFER_SRC_LIST
     "graph/manager/graph_var_manager.cc"
     "graph/manager/host_mem_manager.cc"
     "graph/manager/rdma_pool_allocator.cc"
-    $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
+    "graph/manager/host_mem_allocator.cc"
     "graph/manager/graph_mem_allocator.cc"
     "graph/manager/graph_caching_allocator.cc"
     "model/ge_model.cc"
@@ -531,7 +531,7 @@ set(INFER_SRC_LIST
     "graph/passes/for_pass.cc"
     "graph/passes/enter_pass.cc"
     "graph/passes/assign_remove_pass.cc"
-    $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
+    "graph/passes/inplace_support_check_pass.cc"
     "graph/passes/addn_pass.cc"
     "graph/passes/common_subexpression_elimination_pass.cc"
     "graph/passes/remove_same_const_pass.cc"
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 4ca18864..ac4d4ebd 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -28,7 +28,7 @@ set(SRC_LIST
     "../graph/manager/trans_var_data_utils.cc"
     "../graph/manager/util/debug.cc"
     "../graph/manager/rdma_pool_allocator.cc"
-    $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc>
+    "../graph/manager/host_mem_allocator.cc"
     "../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
     "../model/ge_model.cc"
     "../model/ge_root_model.cc"
diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc
index 0f46b4cb..35ecfb2d 100755
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -26,7 +26,6 @@
 #include "common/math/math_util.h"
 
 namespace {
-#ifndef ONLY_COMPILE_OPEN_SRC
 #define CREATE_OUTPUT_CASE(DTYPE, TYPE)                                                                                \
   case (DTYPE): {                                                                                                      \
     GeTensorPtr ge_tensor = nullptr;                                                                                   \
@@ -50,43 +49,6 @@ namespace {
     named_outputs.emplace(tensor_name, tensor);                                                                        \
     break;                                                                                                             \
   }
-#else
-#define CREATE_OUTPUT_CASE(DTYPE, TYPE)                                                                                \
-  case (DTYPE): {                                                                                                      \
-    GeTensorPtr ge_tensor = nullptr;                                                                                   \
-    if (need_create_flag) {                                                                                            \
-      GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));  \
-      std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]());                                                \
-      if (buf == nullptr) {                                                                                            \
-        GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed",                                         \
-               static_cast<size_t>(sizeof(TYPE) * data_num));                                                          \
-        return MEMALLOC_FAILED;                                                                                        \
-      }                                                                                                                \
-      ge_tensor = MakeShared<GeTensor>(out_desc);                                                                      \
-      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
-      GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
-      if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) {      \
-        GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str());          \
-        return MEMALLOC_FAILED;                                                                                        \
-      }                                                                                                                \
-      ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType());                                              \
-      ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape());                                                    \
-      outputs.emplace_back(ge_tensor);                                                                                 \
-    } else {                                                                                                           \
-      ge_tensor = outputs[i];                                                                                          \
-      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
-      GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i);                                             \
-    }                                                                                                                  \
-    auto tensor = TensorAdapter::AsTensor(*ge_tensor);                                                                 \
-    auto tensor_name = op_desc->GetOutputNameByIndex(i);                                                               \
-    GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu",               \
-                               op_desc->GetName().c_str(), i);                                                         \
-    GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu",     \
-           op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize());                    \
-    named_outputs.emplace(tensor_name, tensor);                                                                        \
-    break;                                                                                                             \
-  }
-#endif
 }
 
 namespace ge {
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index c4f91036..c0f084d8 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -38,10 +38,8 @@
 #include "graph/partition/stage_partition.h"
 #include "graph/passes/addn_pass.h"
 #include "graph/passes/bitcast_pass.h"
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "graph/passes/assign_remove_pass.h"
 #include "graph/passes/inplace_support_check_pass.h"
-#endif
 #include "graph/passes/atomic_addr_clean_pass.h"
 #include "graph/passes/attach_stream_label_pass.h"
 #include "graph/passes/cast_remove_pass.h"
@@ -2269,20 +2267,16 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
   ReshapeRemovePass reshape_remove_pass;
   CondRemovePass condition_remove_pass;
   BitcastPass bitcast_pass;
-#ifndef ONLY_COMPILE_OPEN_SRC
   AssignRemovePass assign_remove_pass;
   InplaceSupportCheckPass inplace_support_check_pass;
-#endif
   names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
   names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
   names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
   names_to_passes.emplace_back("BitcastPass", &bitcast_pass);
-#ifndef ONLY_COMPILE_OPEN_SRC
   if (GetContext().GetHostExecFlag()) {
     names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass);
     names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass);
   }
-#endif
   GE_TIMESTAMP_START(names_to_passes);
   ret = GEPass(compute_graph).Run(names_to_passes);
   GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses");
diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc
index 4e31d835..f3037299 100755
--- a/ge/graph/manager/graph_mem_allocator.cc
+++ b/ge/graph/manager/graph_mem_allocator.cc
@@ -19,9 +19,7 @@
 #include <string>
 #include "graph/manager/graph_caching_allocator.h"
 #include "graph/manager/rdma_pool_allocator.h"
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "graph/manager/host_mem_allocator.h"
-#endif
 namespace ge {
 void MemoryAllocator::Initialize(uint32_t device_id) {
   GELOGI("MemoryAllocator::Initialize");
@@ -192,12 +190,10 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
     GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed.");
     return ge::INTERNAL_ERROR;
   }
-#ifndef ONLY_COMPILE_OPEN_SRC
   if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) {
     GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed.");
     return ge::INTERNAL_ERROR;
   }
-#endif
   return SUCCESS;
 }
 
@@ -219,9 +215,7 @@ void MemManager::Finalize() noexcept {
   // caching and rdma allocator use memory allocator, so finalize them first
   FinalizeAllocatorMap(caching_allocator_map_);
   FinalizeAllocatorMap(rdma_allocator_map_);
-#ifndef ONLY_COMPILE_OPEN_SRC
   FinalizeAllocatorMap(host_allocator_map_);
-#endif
   FinalizeAllocatorMap(memory_allocator_map_);
 }
 
@@ -250,9 +244,7 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
 RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
   return Instance().GetAllocator(memory_type, rdma_allocator_map_);
 }
-#ifndef ONLY_COMPILE_OPEN_SRC
 HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
   return Instance().GetAllocator(memory_type, host_allocator_map_);
 }
-#endif
 }  // namespace ge
diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h
index 6cdbd9b4..bd75dbb9 100644
--- a/ge/graph/manager/graph_mem_allocator.h
+++ b/ge/graph/manager/graph_mem_allocator.h
@@ -139,9 +139,7 @@ class MemoryAllocator {
 using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
 class CachingAllocator;
 class RdmaPoolAllocator;
-#ifndef ONLY_COMPILE_OPEN_SRC
 class HostMemAllocator;
-#endif
 class MemManager {
  public:
   MemManager();
@@ -150,9 +148,7 @@ class MemManager {
   static MemoryAllocator *Instance(rtMemType_t memory_type);
   CachingAllocator &CachingInstance(rtMemType_t memory_type);
   RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
-#ifndef ONLY_COMPILE_OPEN_SRC
   HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
-#endif
   MemManager(const MemManager &) = delete;
   MemManager &operator=(const MemManager &) = delete;
   ///
@@ -240,9 +236,7 @@ class MemManager {
   std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
   std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
   std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
-#ifndef ONLY_COMPILE_OPEN_SRC
   std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
-#endif
   std::recursive_mutex allocator_mutex_;
 };
 }  // namespace ge
diff --git a/ge/graph/manager/host_mem_allocator.h b/ge/graph/manager/host_mem_allocator.h
index b9dbdc4c..d10b2475 100644
--- a/ge/graph/manager/host_mem_allocator.h
+++ b/ge/graph/manager/host_mem_allocator.h
@@ -27,7 +27,7 @@
 namespace ge {
 class HostMemAllocator {
  public:
-  explicit HostMemAllocator(rtMemType_t)  {}
+  explicit HostMemAllocator(rtMemType_t) {}
   ~HostMemAllocator() = default;
 
   HostMemAllocator(const HostMemAllocator &) = delete;
diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc
index c9a33f5c..60a7586d 100644
--- a/ge/graph/manager/host_mem_manager.cc
+++ b/ge/graph/manager/host_mem_manager.cc
@@ -43,29 +43,20 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) {
     return GE_GRAPH_MEMORY_ALLOC_FAILED;
   }
   mem_info.fd = output_para.fd;
-#ifndef ONLY_COMPILE_OPEN_SRC
   mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) {
                                                                ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr));
                                                              },
                                                              [](uint8_t *ptr) {
                                                                ptr = nullptr;
                                                              });
-#else
-  mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr);
-#endif
   mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr);
   return SUCCESS;
 }
 
 Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) {
   GELOGD("SharedMemAllocator::DeAllocate");
-#ifndef ONLY_COMPILE_OPEN_SRC
   rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
                                         mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address};
-#else
-  rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
-                                        mem_info.host_address, mem_info.device_address};
-#endif
   rtError_t rt_ret = rtFreeHostSharedMemory(&free_para);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret);
diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h
index f204c9e4..be3237c3 100644
--- a/ge/graph/manager/host_mem_manager.h
+++ b/ge/graph/manager/host_mem_manager.h
@@ -42,11 +42,7 @@ struct SharedMemInfo {
   uint64_t mem_size = 0;
   int fd = 0;
   uint8_t *device_address = nullptr;
-#ifndef ONLY_COMPILE_OPEN_SRC
   std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr;
-#else
-  uint8_t *host_address = nullptr;
-#endif
   SharedMemInfo() = default;
   SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {}
 };
diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc
index cd80a956..8cca5b5d 100644
--- a/ge/graph/optimize/graph_optimize.cc
+++ b/ge/graph/optimize/graph_optimize.cc
@@ -127,6 +127,10 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
 }
 
 Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
+  if (GetContext().GetHostExecFlag()) {
+    // graph exec on host, no need OptimizeOriginalGraph
+    return SUCCESS;
+  }
   if (compute_graph == nullptr) {
     GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeOriginalGraph]: compute_graph is nullptr.");
     return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
@@ -162,7 +166,7 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
 Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) {
   GELOGD("OptimizeOriginalGraphJudgeInsert in");
   if (GetContext().GetHostExecFlag()) {
-    // graph exec on host, no need OptimizeOriginalGraph
+    // graph exec on host, no need OptimizeOriginalGraphJudgeInsert
     return SUCCESS;
   }
 
diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc
index 5029b9c3..51e6e006 100644
--- a/ge/graph/passes/assign_remove_pass.cc
+++ b/ge/graph/passes/assign_remove_pass.cc
@@ -29,7 +29,6 @@ static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA,
 }
 
 namespace ge {
-#ifndef ONLY_COMPILE_OPEN_SRC
 Status AssignRemovePass::Run(NodePtr &node) {
   GELOGD("AssignRemovePass running");
 
@@ -145,71 +144,7 @@ Status AssignRemovePass::TransformAttr(NodePtr &node) {
   }
   return SUCCESS;
 }
-#else
-Status AssignRemovePass::Run(NodePtr &node) {
-  GELOGD("AssignRemovePass running");
-  if (node->GetType() != ASSIGN) {
-    GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str());
-    return SUCCESS;
-  }
-
-  const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex);
-  const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex);
-  if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) {
-    GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str());
-    return FAILED;
-  }
-  const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor();
-  const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor();
-  if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) {
-    GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str());
-    return FAILED;
-  }
-
-  if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) {
-    ///
-    ///    variable  not-const               not-const
-    ///         \     /                          |
-    ///          \   /                           |
-    ///         Assign           ---->        variable
-    ///           |                              |
-    ///           |                              |
-    ///         node                           node
-    ///
-    GELOGI("Optimization for assign_node %s start", node->GetName().c_str());
-    if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) {
-      GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str());
-      return FAILED;
-    }
-    AddNodeDeleted(node);
-
-    const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc();
-    const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc();
-    if ((ref_input == nullptr) || (value_input == nullptr)) {
-      GELOGE(FAILED, "value input is null");
-      return FAILED;
-    }
-    if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME,
-                           ref_input->GetName())) {
-      GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
-      return FAILED;
-    }
 
-    // variable has and only has one input
-    if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) {
-      GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str());
-      return FAILED;
-    }
-    if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
-      GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str());
-      return FAILED;
-    }
-  }
-
-  GELOGD("AssignRemovePass success");
-  return SUCCESS;
-}
-#endif
 ///
 /// @brief Check if need optimize for assign_node
 /// @param [in] assign_node
@@ -218,7 +153,7 @@ Status AssignRemovePass::Run(NodePtr &node) {
 /// @return Status
 ///
 bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor,
-                             const OutDataAnchorPtr &value_peer_anchor) {
+                                   const OutDataAnchorPtr &value_peer_anchor) {
   GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s",
          node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(),
          value_peer_anchor->GetOwnerNode()->GetName().c_str());
diff --git a/ge/graph/passes/assign_remove_pass.h b/ge/graph/passes/assign_remove_pass.h
index f8ef2e13..6588df7b 100644
--- a/ge/graph/passes/assign_remove_pass.h
+++ b/ge/graph/passes/assign_remove_pass.h
@@ -25,7 +25,6 @@ class AssignRemovePass : public BaseNodePass {
   Status Run(NodePtr &node) override;
 
  private:
-#ifndef ONLY_COMPILE_OPEN_SRC
   ///
   /// @brief Optimize for assign_node
   /// @param [in] assign_node
@@ -39,7 +38,7 @@ class AssignRemovePass : public BaseNodePass {
   /// @return Status
   ///
   Status TransformAttr(NodePtr &node);
-#endif
+
   ///
   /// @brief Check if need optimize for assign_node
   /// @param [in] assign_node
diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc
index 8ee89648..eb8b3470 100644
--- a/ge/graph/passes/constant_fuse_same_pass.cc
+++ b/ge/graph/passes/constant_fuse_same_pass.cc
@@ -115,21 +115,15 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph,
              TypeUtils::DataTypeToSerialString(data_type).c_str());
       continue;
     }
-#ifndef ONLY_COMPILE_OPEN_SRC
     if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) {
       GELOGW("aligned_ptr is null while size is not 0");
       continue;
     }
-#endif
     ++insert_const_nums;
 
     SameConstKey map_key;
     map_key.data_size = type_size;
-#ifndef ONLY_COMPILE_OPEN_SRC
     map_key.aligned_ptr = weight->MutableData().GetAlignedPtr();
-#else
-    map_key.data = weight->GetData().GetData();
-#endif
     map_key.data_type = data_type;
     map_key.format = output_tensor->GetFormat();
     map_key.shape = output_tensor->GetShape().GetDims();
diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h
index ae39c707..3ff2d6b7 100755
--- a/ge/graph/passes/constant_fuse_same_pass.h
+++ b/ge/graph/passes/constant_fuse_same_pass.h
@@ -21,20 +21,14 @@
 #include <set>
 #include <utility>
 #include <vector>
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "graph/aligned_ptr.h"
-#endif
 #include "graph/types.h"
 #include "inc/graph_pass.h"
 
 namespace ge {
 struct SameConstKey {
   int data_size;
-#ifndef ONLY_COMPILE_OPEN_SRC
   std::shared_ptr<AlignedPtr> aligned_ptr;
-#else
-  const uint8_t *data;
-#endif
   DataType data_type;
   Format format;
   std::vector<int64_t> shape;
@@ -44,19 +38,12 @@ struct SameConstKey {
     if (data_size != key.data_size) {
       return data_size < key.data_size;
     }
-#ifndef ONLY_COMPILE_OPEN_SRC
     if (data_size != 0) {
       int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size);
       if (ret != 0) {
         return ret < 0;
       }
     }
-#else
-    int ret = memcmp(data, key.data, data_size);
-    if (ret != 0) {
-      return ret < 0;
-    }
-#endif
     if (data_type != key.data_type) {
       return data_type < key.data_type;
     }
diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc
index f94633a1..0bfec241 100644
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -38,9 +38,6 @@
 #include "graph/passes/aicpu_constant_folding_pass.h"
 #include "graph/passes/assert_pass.h"
 #include "ge/ge_api_types.h"
-#ifdef ONLY_COMPILE_OPEN_SRC
-#include "graph/passes/assign_remove_pass.h"
-#endif
 #include "graph/passes/common_subexpression_elimination_pass.h"
 #include "graph/passes/cond_pass.h"
 #include "graph/passes/cond_remove_pass.h"
@@ -1865,9 +1862,6 @@ Status GraphPrepare::PrepareOptimize() {
   VarIsInitializedOpPass var_is_initialized_pass;
   ParallelConcatStartOpPass parallel_concat_start_op_pass;
   IdentityPass identity_pass(false);
-#ifdef ONLY_COMPILE_OPEN_SRC
-  AssignRemovePass assign_remove_pass;
-#endif
   SnapshotPass snapshot_pass;
   if (!options_.train_graph_flag) {
     names_to_passes.emplace_back("DropOutPass", &dropout_pass);
@@ -1882,11 +1876,6 @@ Status GraphPrepare::PrepareOptimize() {
   names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass);
   names_to_passes.emplace_back("ParallelConcatStartOpPass", &parallel_concat_start_op_pass);
   names_to_passes.emplace_back("IdentityPass", &identity_pass);
-#ifdef ONLY_COMPILE_OPEN_SRC
-  if (GetContext().GetHostExecFlag()) {
-    names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass);
-  }
-#endif
   GE_TIMESTAMP_START(names_to_passes);
   ret = ge_passes.Run(names_to_passes);
   GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses");
diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc
index c2602f37..ccd6a624 100644
--- a/ge/hybrid/common/npu_memory_allocator.cc
+++ b/ge/hybrid/common/npu_memory_allocator.cc
@@ -20,9 +20,7 @@
 #include "graph/manager/graph_caching_allocator.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/rdma_pool_allocator.h"
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "graph/manager/host_mem_allocator.h"
-#endif
 
 namespace ge {
 namespace hybrid {
@@ -67,11 +65,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
   if (mem_type == RDMA_HBM) {
     buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_);
   } else if (mem_type == HOST_DDR) {
-#ifndef ONLY_COMPILE_OPEN_SRC
     buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size);
-#else
-    buffer = malloc(allocate_size);
-#endif
   } else {
     if (allocate_size > kMaxHbmMemorySize) {
       GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size);
@@ -108,11 +102,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) {
     if (mem_type == RDMA_HBM) {
       MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
     } else if (mem_type == HOST_DDR) {
-#ifndef ONLY_COMPILE_OPEN_SRC
       MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data);
-#else
-      free(data);
-#endif
     } else {
       MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
     }
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index 32fc495a..d1f61985 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -25,10 +25,8 @@
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/host_mem_allocator.h"
-#endif
 #include "graph/utils/graph_utils.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/node_executor/node_executor.h"
@@ -865,7 +863,6 @@ Status HybridModelBuilder::InitConstantOps() {
 
     std::unique_ptr<TensorValue> var_tensor;
     if (GetContext().GetHostExecFlag()) {
-#ifndef ONLY_COMPILE_OPEN_SRC
       GE_CHECK_NOTNULL(ge_tensor);
       // Address for eigen kernel should be aligned with 16 bytes
       // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned
@@ -878,11 +875,6 @@ Status HybridModelBuilder::InitConstantOps() {
       }
       var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
                                                     aligned_tensor.GetData().size()));
-#else
-      auto buffer = ge_tensor->MutableData();
-      GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize());
-      var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize()));
-#endif
     } else {
       GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor));
       GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize());
@@ -937,7 +929,6 @@ Status HybridModelBuilder::InitVariableTensors() {
       GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str());
       return GE_GRAPH_MALLOC_FAILED;
     }
-#ifndef ONLY_COMPILE_OPEN_SRC
     if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
                                                                      tensor_size) == nullptr) {
       GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
@@ -947,11 +938,6 @@ Status HybridModelBuilder::InitVariableTensors() {
 
     std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(),
                                                                        tensor_size));
-#else
-    GELOGD("Host variable [%s] malloc success.", it.first.c_str());
-
-    std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size));
-#endif
     GE_CHECK_NOTNULL(tensor);
     hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor));
   }
diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
index 32522fe8..0cc635e4 100755
--- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
+++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
@@ -18,10 +18,8 @@
 #include "hybrid/node_executor/host_cpu/kernel_factory.h"
 #include "graph/passes/folding_pass.h"
 #include "hybrid/model/hybrid_model.h"
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/manager/host_mem_allocator.h"
-#endif
 #include "ge_local_engine/engine/host_cpu_engine.h"
 
 namespace ge {
@@ -54,18 +52,11 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
     auto input_desc_ptr = context.GetInputDesc(i);
     GE_CHECK_NOTNULL(input_desc_ptr);
     const auto &input_desc = *input_desc_ptr;
-#ifndef ONLY_COMPILE_OPEN_SRC
     auto tensor = context.GetInput(i);
     GE_CHECK_NOTNULL(tensor);
     auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
     GE_CHECK_NOTNULL(item.second);
     auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first);
-#else
-    GE_CHECK_NOTNULL(context.GetInput(i));
-    auto in_tensor = MakeShared<GeTensor>(input_desc,
-                                          reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()),
-                                          context.GetInput(i)->GetSize());
-#endif
     GE_CHECK_NOTNULL(in_tensor);
     in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType());
     in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape());
@@ -84,15 +75,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
     }
     auto tensor = context.GetOutput(i);
     GE_CHECK_NOTNULL(tensor);
-#ifndef ONLY_COMPILE_OPEN_SRC
     auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
     GE_CHECK_NOTNULL(item.second);
     auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first);
-#else
-    auto out_tensor = MakeShared<GeTensor>(output_desc,
-                                           reinterpret_cast<const uint8_t *>(tensor->GetData()),
-                                           tensor->GetSize());
-#endif
     GE_CHECK_NOTNULL(out_tensor);
     out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType());
     out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape());

From 6fdd3de6704a35435fd88ec2008f5aadaff7b56b Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 30 Dec 2020 16:23:13 +0800
Subject: [PATCH 41/54] rm compile macro

---
 ge/CMakeLists.txt                    |  2 --
 ge/common/CMakeLists.txt             |  3 --
 ge/executor/CMakeLists.txt           |  2 --
 ge/ge_local_engine/CMakeLists.txt    |  5 ----
 ge/ge_runtime/CMakeLists.txt         |  1 -
 ge/graph/build/memory/CMakeLists.txt | 44 ++++++++++++++++++++++++++++
 ge/host_cpu_engine/CMakeLists.txt    |  5 ----
 ge/offline/CMakeLists.txt            |  3 --
 ge/plugin/engine/CMakeLists.txt      |  1 -
 9 files changed, 44 insertions(+), 22 deletions(-)
 create mode 100644 ge/graph/build/memory/CMakeLists.txt

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index e94258ac..5181bb61 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -718,7 +718,6 @@ target_compile_definitions(ge_compiler PRIVATE
     FMK_HOST_INFER
     COMPILE_OMG_PACKAGE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_options(ge_compiler PRIVATE
@@ -806,7 +805,6 @@ endif()
 
 target_compile_definitions(opensrc_ascendcl PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_options(opensrc_ascendcl PRIVATE
diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt
index d2b8c8e7..bb08570a 100755
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -73,7 +73,6 @@ target_compile_definitions(ge_common PRIVATE
     FMK_SUPPORT_DUMP
     OS_CENTOS
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_options(ge_common PRIVATE
@@ -133,7 +132,6 @@ target_compile_definitions(ge_common_static PRIVATE
     $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
     $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
     LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_options(ge_common_static PRIVATE
@@ -182,7 +180,6 @@ target_compile_definitions(ge_common PRIVATE
     FMK_SUPPORT_DUMP
     OS_CENTOS
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_options(ge_common PRIVATE
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index ac4d4ebd..755bdf97 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -175,7 +175,6 @@ target_compile_definitions(ge_executor PRIVATE
     $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
     $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
     LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(ge_executor PRIVATE
@@ -218,7 +217,6 @@ target_compile_definitions(ge_executor_shared PRIVATE
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
     DAVINCI_SUPPORT_PROFILING
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(ge_executor_shared PRIVATE
diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt
index f963730b..7189e8ff 100755
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@@ -31,7 +31,6 @@ target_compile_options(ge_local_engine PRIVATE
 
 target_compile_definitions(ge_local_engine PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(ge_local_engine PRIVATE
@@ -73,7 +72,6 @@ target_compile_options(atc_ge_local_engine PRIVATE
 target_compile_definitions(atc_ge_local_engine PRIVATE
     COMPILE_OMG_PACKAGE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(atc_ge_local_engine PRIVATE
@@ -119,7 +117,6 @@ target_compile_options(ge_local_opskernel_builder PRIVATE
 
 target_compile_definitions(ge_local_opskernel_builder PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(ge_local_opskernel_builder PRIVATE
@@ -161,7 +158,6 @@ target_compile_options(atc_ge_local_opskernel_builder PRIVATE
 
 target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(atc_ge_local_opskernel_builder PRIVATE
@@ -209,7 +205,6 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE
 target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
     google=ascend_private
     LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(ge_local_opskernel_builder_static PRIVATE
diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt
index ca770b15..ce1b89ea 100644
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -27,7 +27,6 @@ target_compile_options(ge_runtime PRIVATE
 
 target_compile_definitions(ge_runtime PRIVATE 
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(ge_runtime PRIVATE
diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt
new file mode 100644
index 00000000..126e0187
--- /dev/null
+++ b/ge/graph/build/memory/CMakeLists.txt
@@ -0,0 +1,44 @@
+set(SRC_LIST
+    "memory_assigner.cc"
+    "graph_mem_assigner.cc"
+    "binary_block_mem_assigner.cc"
+    "block_mem_assigner.cc"
+    "hybrid_mem_assigner.cc"
+    "max_block_mem_assigner.cc"
+    "var_mem_assign_util.cc"
+)
+
+############ libge_memory.a ############
+add_library(ge_memory STATIC ${SRC_LIST})
+
+target_compile_options(ge_memory PRIVATE
+    -Werror
+    -O2
+    -fno-common
+)
+
+target_compile_definitions(ge_memory PRIVATE
+    google=ascend_private
+    LOG_CPP
+)
+
+target_link_libraries(ge_memory PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+    ascend_protobuf
+    c_sec
+)
+
+target_include_directories(ge_memory PRIVATE
+    ${CMAKE_CURRENT_LIST_DIR}
+    ${GE_CODE_DIR}/ge
+    ${GE_CODE_DIR}/inc
+    ${GE_CODE_DIR}/inc/external
+    ${METADEF_DIR}/inc
+    ${METADEF_DIR}/inc/external
+    ${METADEF_DIR}/inc/external/graph
+    ${GE_CODE_DIR}/inc/framework
+    #### yellow zone ####
+    ${GE_CODE_DIR}/../inc
+    #### blue zone ####
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
+)
diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt
index f20f810e..cbd0bd8b 100644
--- a/ge/host_cpu_engine/CMakeLists.txt
+++ b/ge/host_cpu_engine/CMakeLists.txt
@@ -25,7 +25,6 @@ target_compile_options(host_cpu_engine PRIVATE
 
 target_compile_definitions(host_cpu_engine PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(host_cpu_engine PRIVATE
@@ -66,7 +65,6 @@ target_compile_options(atc_host_cpu_engine PRIVATE
 target_compile_definitions(atc_host_cpu_engine PRIVATE
     COMPILE_OMG_PACKAGE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(atc_host_cpu_engine PRIVATE
@@ -111,7 +109,6 @@ target_compile_options(host_cpu_opskernel_builder PRIVATE
 
 target_compile_definitions(host_cpu_opskernel_builder PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(host_cpu_opskernel_builder PRIVATE
@@ -152,7 +149,6 @@ target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
 
 target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
@@ -199,7 +195,6 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE
 target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
     google=ascend_private
     LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(host_cpu_opskernel_builder_static PRIVATE
diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt
index cb6a3a50..d195e06f 100644
--- a/ge/offline/CMakeLists.txt
+++ b/ge/offline/CMakeLists.txt
@@ -30,7 +30,6 @@ target_compile_definitions(atc PRIVATE
     COMPILE_OMG_PACKAGE
     google=ascend_private
     LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(atc PRIVATE
@@ -93,7 +92,6 @@ target_compile_definitions(atc_atc.bin PRIVATE
     COMPILE_OMG_PACKAGE
     google=ascend_private
     LOG_CPP
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(atc_atc.bin PRIVATE
@@ -154,7 +152,6 @@ target_compile_options(fwk_atc.bin PRIVATE
     -O2
     -Wno-deprecated-declarations
     -fno-common
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_definitions(fwk_atc.bin PRIVATE
diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt
index 65d5a8a1..f6353231 100644
--- a/ge/plugin/engine/CMakeLists.txt
+++ b/ge/plugin/engine/CMakeLists.txt
@@ -14,7 +14,6 @@ target_compile_options(engine PRIVATE
 target_compile_definitions(engine PRIVATE
     REUSE_MEMORY=1
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_include_directories(engine PRIVATE

From f95efe48a3e16db8c6973327600eb1072eeece9e Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 30 Dec 2020 16:25:13 +0800
Subject: [PATCH 42/54] rm compile macro

---
 tests/ut/common/graph/CMakeLists.txt | 1 -
 tests/ut/ge/CMakeLists.txt           | 9 ---------
 2 files changed, 10 deletions(-)

diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt
index e2490150..99b21182 100644
--- a/tests/ut/common/graph/CMakeLists.txt
+++ b/tests/ut/common/graph/CMakeLists.txt
@@ -110,7 +110,6 @@ target_compile_options(ut_libgraph PRIVATE
 
 target_compile_definitions(ut_libgraph PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ut_libgraph 
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index db725dfb..9af3719b 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -898,10 +898,6 @@ target_compile_options(ut_libge_others_utest PRIVATE
     -g --coverage -fprofile-arcs -ftest-coverage
 )
 
-target_compile_definitions(ut_libge_others_utest PRIVATE
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
-)
-
 target_link_libraries(ut_libge_others_utest
     $<BUILD_INTERFACE:intf_pub>
     ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
@@ -919,10 +915,6 @@ target_compile_options(ut_libge_kernel_utest PRIVATE
     -g --coverage -fprofile-arcs -ftest-coverage
 )
 
-target_compile_definitions(ut_libge_kernel_utest PRIVATE
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
-)
-
 target_link_libraries(ut_libge_kernel_utest
     $<BUILD_INTERFACE:intf_pub>
     ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
@@ -943,7 +935,6 @@ target_compile_options(ut_libge_distinct_load_utest PRIVATE
 
 target_compile_definitions(ut_libge_distinct_load_utest PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ut_libge_distinct_load_utest

From 94bdb8e280fc6e44a1ad0a5877d86ac6918bda5f Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 30 Dec 2020 16:34:52 +0800
Subject: [PATCH 43/54] rm compile macro

---
 ge/graph/passes/assign_remove_pass.cc         | 3 +--
 ge/graph/passes/inplace_support_check_pass.cc | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc
index 51e6e006..72e108c3 100644
--- a/ge/graph/passes/assign_remove_pass.cc
+++ b/ge/graph/passes/assign_remove_pass.cc
@@ -19,6 +19,7 @@
 #include "graph/utils/graph_utils.h"
 #include "graph/debug/ge_attr_define.h"
 
+namespace ge {
 namespace {
 constexpr uint32_t kValidInputNodeOutputNum = 1;
 constexpr int32_t kAssignRefInputIndex = 0;
@@ -27,8 +28,6 @@ static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA,
                                                         ge::CONSTANT, ge::CONSTANTOP,
                                                         ge::VARIABLE, ge::VARIABLEV2 };
 }
-
-namespace ge {
 Status AssignRemovePass::Run(NodePtr &node) {
   GELOGD("AssignRemovePass running");
 
diff --git a/ge/graph/passes/inplace_support_check_pass.cc b/ge/graph/passes/inplace_support_check_pass.cc
index 73cc7f3b..44ad8361 100644
--- a/ge/graph/passes/inplace_support_check_pass.cc
+++ b/ge/graph/passes/inplace_support_check_pass.cc
@@ -19,6 +19,7 @@
 #include "graph/utils/graph_utils.h"
 #include "graph/debug/ge_attr_define.h"
 
+namespace ge {
 namespace {
 constexpr uint32_t kInplaceSupportOutputIndex = 0;
 constexpr uint32_t kInplaceSupportOutputNum = 1;
@@ -26,8 +27,6 @@ static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge:
                                                      ge::CONSTANT, ge::CONSTANTOP,
                                                      ge::VARIABLE, ge::VARIABLEV2 };
 }
-
-namespace ge {
 Status InplaceSupportCheckPass::Run(NodePtr &node) {
   GELOGD("InplaceSupportCheckPass running");
   if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) {

From be2a31e2289faccdb7712ff931ea60db3c037f3e Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Tue, 5 Jan 2021 19:28:51 +0800
Subject: [PATCH 44/54] rm macro

---
 ge/CMakeLists.txt                       | 1 -
 ge/client/ge_api.cc                     | 8 --------
 inc/framework/common/ge_types.h         | 3 ---
 inc/framework/omg/parser/model_parser.h | 2 --
 tests/ut/ge/CMakeLists.txt              | 5 -----
 5 files changed, 19 deletions(-)

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 5181bb61..317ff00a 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -654,7 +654,6 @@ target_compile_definitions(ge_runner PRIVATE
     FMK_SUPPORT_DUMP
     DAVINCI_CLOUD
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_compile_options(ge_runner PRIVATE
diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc
index 75cc92d4..d65d7667 100644
--- a/ge/client/ge_api.cc
+++ b/ge/client/ge_api.cc
@@ -32,9 +32,7 @@
 #include "graph/common/ge_call_wrapper.h"
 #include "register/op_registry.h"
 #include "common/ge/tbe_plugin_manager.h"
-#ifndef ONLY_COMPILE_OPEN_SRC
 #include "toolchain/plog.h"
-#endif
 
 using domi::OpRegistry;
 using std::map;
@@ -132,11 +130,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) {
 
 // Initialize GE, prepare for execution, call GELib::Initialize
 Status GEInitialize(const std::map<string, string> &options) {
-#ifndef ONLY_COMPILE_OPEN_SRC
   if (DlogReportInitialize() != SUCCESS) {
     GELOGW("Dlog report device log initialize failed.");
   }
-#endif
   return GEInitializeImpl(options);
 }
 
@@ -151,11 +147,9 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
     std::string val = option.second.GetString();
     str_options[key] = val;
   }
-#ifndef ONLY_COMPILE_OPEN_SRC
   if (DlogReportInitialize() != SUCCESS) {
     GELOGW("Dlog report device log initialize failed.");
   }
-#endif
   return GEInitializeImpl(str_options);
 }
 
@@ -200,11 +194,9 @@ Status GEFinalize() {
   // to avoid memory fragment, use malloc_trim to back free stack to system
   malloc_trim(0);
 
-#ifndef ONLY_COMPILE_OPEN_SRC
   if (DlogReportFinalize() != SUCCESS) {
     GELOGW("Dlog report device log finalize failed.");
   }
-#endif
 
   GELOGT(TRACE_STOP, "GEFinalize finished");
   return ret;
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index d845654e..8327b72c 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -37,10 +37,7 @@ enum FrameworkType {
   MINDSPORE = 1,
   TENSORFLOW = 3,
   ANDROID_NN,
-#ifndef ONLY_COMPILE_OPEN_SRC
   ONNX,
-#endif
-  FRAMEWORK_RESERVED,
 };
 
 enum OpEngineType {
diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h
index 57cff9a7..9eda685d 100644
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -65,7 +65,6 @@ class ModelParser {
    */
   virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;
 
-#ifndef ONLY_COMPILE_OPEN_SRC
   /**
    * @ingroup domi_omg
    * @brief Parse relevant data from memory and save it to graph
@@ -77,7 +76,6 @@ class ModelParser {
    * @author
    */
   virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
-#endif
 
   /**
    * @ingroup domi_omg
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 9af3719b..3a06507c 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -723,7 +723,6 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS})
 
 target_compile_definitions(ge_ut_common PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ge_ut_common PRIVATE
@@ -738,7 +737,6 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F
 
 target_compile_definitions(ge_ut_common_format PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ge_ut_common_format PRIVATE
@@ -795,7 +793,6 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $
 
 target_compile_definitions(ge_load_common PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ge_load_common PRIVATE
@@ -810,7 +807,6 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S
 
 target_compile_definitions(ge_execute_common PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ge_execute_common PRIVATE
@@ -825,7 +821,6 @@ add_library(ge_build_common STATIC ${GRAPH_BUILD_COMMON_SRC_FILES} ${PROTO_SRCS}
 
 target_compile_definitions(ge_build_common PRIVATE
     google=ascend_private
-    $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
 )
 
 target_link_libraries(ge_build_common PRIVATE

From f262eb8f2ce702b2ab33d0850fbc5f3df2c73009 Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Tue, 5 Jan 2021 19:41:10 +0800
Subject: [PATCH 45/54] rm macro

---
 ge/graph/build/memory/CMakeLists.txt | 44 ----------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 ge/graph/build/memory/CMakeLists.txt

diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt
deleted file mode 100644
index 126e0187..00000000
--- a/ge/graph/build/memory/CMakeLists.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-set(SRC_LIST
-    "memory_assigner.cc"
-    "graph_mem_assigner.cc"
-    "binary_block_mem_assigner.cc"
-    "block_mem_assigner.cc"
-    "hybrid_mem_assigner.cc"
-    "max_block_mem_assigner.cc"
-    "var_mem_assign_util.cc"
-)
-
-############ libge_memory.a ############
-add_library(ge_memory STATIC ${SRC_LIST})
-
-target_compile_options(ge_memory PRIVATE
-    -Werror
-    -O2
-    -fno-common
-)
-
-target_compile_definitions(ge_memory PRIVATE
-    google=ascend_private
-    LOG_CPP
-)
-
-target_link_libraries(ge_memory PRIVATE
-    $<BUILD_INTERFACE:intf_pub>
-    ascend_protobuf
-    c_sec
-)
-
-target_include_directories(ge_memory PRIVATE
-    ${CMAKE_CURRENT_LIST_DIR}
-    ${GE_CODE_DIR}/ge
-    ${GE_CODE_DIR}/inc
-    ${GE_CODE_DIR}/inc/external
-    ${METADEF_DIR}/inc
-    ${METADEF_DIR}/inc/external
-    ${METADEF_DIR}/inc/external/graph
-    ${GE_CODE_DIR}/inc/framework
-    #### yellow zone ####
-    ${GE_CODE_DIR}/../inc
-    #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc
-)

From 202bb6bdd35d6ea6e31410e3ff788f1d7ca10802 Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Tue, 5 Jan 2021 21:03:14 +0800
Subject: [PATCH 46/54] rm macro

---
 CMakeLists.txt             | 8 ++++----
 cmake/intf_pub_linux.cmake | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9194f119..88ce15ff 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,7 +72,7 @@ if (ENABLE_OPEN_SRC)
         endif()
         set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
         set(STATIC_ACL_LIB ${GE_LIB_PATH})
-        find_module(slog libslog.so ${GE_LIB_PATH})
+        find_module(slog libalog.so ${GE_LIB_PATH})
         find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
         find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH})
         find_module(hccl libhccl.so ${GE_LIB_PATH})
@@ -88,7 +88,7 @@ if (ENABLE_OPEN_SRC)
     elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
     else()
-        find_module(slog libslog.so ${ASCEND_ATC_DIR})
+        find_module(slog libalog.so ${ASCEND_ATC_DIR})
         find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
 	find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
         if(PLATFORM STREQUAL "train")
@@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL)
     include(cmake/intf_pub_linux.cmake)
 
     # common libraries
-    find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
+    find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
     find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
     find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
 
@@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES)
     include(cmake/intf_pub_linux.cmake)
 
     # common libraries
-    find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
+    find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
     find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
     find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
 
diff --git a/cmake/intf_pub_linux.cmake b/cmake/intf_pub_linux.cmake
index 40c6bca9..61237d11 100755
--- a/cmake/intf_pub_linux.cmake
+++ b/cmake/intf_pub_linux.cmake
@@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE
     $<$<CONFIG:Debug>:CFG_BUILD_DEBUG>   
     WIN64=1
     LINUX=0
+    LOG_CPP
 )
 target_link_options(intf_pub INTERFACE
     -Wl,-z,relro

From 739849bc74e56d845546c117792381b302423d0f Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 6 Jan 2021 09:58:01 +0800
Subject: [PATCH 47/54] rm macro

---
 metadef                                       |   2 +-
 parser                                        |   2 +-
 .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 120 +++++++--------
 third_party/fwkacllib/inc/hccl/hcom.h         | 128 ++++------------
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h   |   3 +-
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h     |   1 +
 .../inc/register/op_kernel_registry.h         |  49 +++++++
 .../fwkacllib/inc/register/op_registry.h      |  96 ++++++++++++
 third_party/fwkacllib/inc/runtime/base.h      |  25 +---
 third_party/fwkacllib/inc/runtime/config.h    |   6 +-
 third_party/fwkacllib/inc/runtime/context.h   |   2 +-
 third_party/fwkacllib/inc/runtime/dev.h       |   2 +-
 third_party/fwkacllib/inc/runtime/kernel.h    |   2 +-
 third_party/fwkacllib/inc/runtime/mem.h       |  34 ++---
 third_party/fwkacllib/inc/runtime/stream.h    |   1 +
 .../fwkacllib/inc/soft_dp/ExternalSoftDp.h    |  52 +++++++
 third_party/fwkacllib/inc/toolchain/slog.h    | 138 ++++++++++++++++++
 17 files changed, 460 insertions(+), 203 deletions(-)
 create mode 100644 third_party/fwkacllib/inc/register/op_kernel_registry.h
 create mode 100644 third_party/fwkacllib/inc/register/op_registry.h
 create mode 100644 third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h

diff --git a/metadef b/metadef
index fe37bc34..f08320a6 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900
+Subproject commit f08320a6d699f5b537bf66da572bf225b9cd330e
diff --git a/parser b/parser
index 336cd310..b2df31dc 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b
+Subproject commit b2df31dc5810283e2e483df5ba9517e2ece132a0
diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
index 7e0f94a8..8d16467c 100644
--- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
+++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -1,60 +1,60 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef AICPU_OP_TYPE_LIST_H_
-#define AICPU_OP_TYPE_LIST_H_
-
-enum OpKernelType {
-    TF_KERNEL,
-    CPU_KERNEL
-};
-
-enum ReturnCode {
-    OP_TYPE_NOT_SUPPORT,
-    FORMAT_NOT_SUPPORT,
-    DTYPE_NOT_SUPPORT
-};
-
-#pragma pack(push, 1)
-//One byte alignment
-struct SysOpInfo {
-    uint64_t opLen;
-    uint64_t opType;
-    OpKernelType kernelsType;
-};
-
-struct OpParamInfo {
-    uint64_t num;
-    uint64_t dtypeList;
-    uint64_t formatList;
-};
-
-struct SysOpCheckInfo {
-    uint64_t opListNum;
-    uint64_t offSetLen;
-    uint64_t sysOpInfoList;
-    uint64_t opParamInfoList;
-};
-
-struct SysOpCheckResp {
-    uint64_t opListNum;
-    bool isWithoutJson;
-    uint64_t returnCodeList;
-    uint64_t sysOpInfoList;
-    uint64_t opParamInfoList;
-};
-#pragma pack(pop)
-#endif  // AICPU_OP_TYPE_LIST_H_
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AICPU_OP_TYPE_LIST_H_
+#define AICPU_OP_TYPE_LIST_H_
+
+enum OpKernelType {
+  TF_KERNEL,
+  CPU_KERNEL
+};
+
+enum ReturnCode {
+  OP_TYPE_NOT_SUPPORT,
+  FORMAT_NOT_SUPPORT,
+  DTYPE_NOT_SUPPORT
+};
+
+#pragma pack(push, 1)
+//One byte alignment
+struct SysOpInfo {
+  uint64_t opLen;
+  uint64_t opType;
+  OpKernelType kernelsType;
+};
+
+struct OpParamInfo {
+  uint64_t num;
+  uint64_t dtypeList;
+  uint64_t formatList;
+};
+
+struct SysOpCheckInfo {
+  uint64_t opListNum;
+  uint64_t offSetLen;
+  uint64_t sysOpInfoList;
+  uint64_t opParamInfoList;
+};
+
+struct SysOpCheckResp {
+  uint64_t opListNum;
+  bool isWithoutJson;
+  uint64_t returnCodeList;
+  uint64_t sysOpInfoList;
+  uint64_t opParamInfoList;
+};
+#pragma pack(pop)
+#endif  // AICPU_OP_TYPE_LIST_H_
diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h
index e491d43f..972f470c 100644
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -33,15 +33,6 @@ extern "C" {
 
 
 
-/**
- * @brief Get the rank number in the group.
- *
- * @param group A string identifying the group name.
- * @param rankSize A pointer identifying the rank number.
- * @return HcclResult 
- */
-HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
-
 /**
  * @brief Get the rank number in the group.
  *
@@ -51,15 +42,6 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
  */
 HcclResult HcomGetRankSize(const char *group, u32 *rankSize);
 
-/**
- * @brief Get the rank number of this rank's server within the group.
- *
- * @param group A string identifying the group name.
- * @param localRankSize A pointer identifying the rank number.
- * @return HcclResult 
- */
-HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
-
 /**
  * @brief Get the rank number of this rank's server within the group.
  *
@@ -69,15 +51,6 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
  */
 HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);
 
-/**
- * @brief Get the rank id of this rank.
- *
- * @param group A string identifying the group name.
- * @param rankId A pointer identifying the rank id.
- * @return HcclResult 
- */
-HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
-
 /**
  * @brief Get the rank id of this rank.
  *
@@ -87,15 +60,6 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
  */
 HcclResult HcomGetRankId(const char *group, u32 *rankId);
 
-/**
- * @brief Get the local rank id of this rank's server within the group.
- *
- * @param group A string identifying the group name.
- * @param localRankId A pointer identifying the local rank id.
- * @return HcclResult 
- */
-HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
-
 /**
  * @brief Get the local rank id of this rank's server within the group.
  *
@@ -105,16 +69,6 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
  */
 HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);
 
-/**
- * @brief Get the world rank id according to the group rank id.
- *
- * @param group A string identifying the group name.
- * @param groupRank An integer(u32) identifying the group rank id.
- * @param worldRank A pointer identifying the world rank id.
- * @return HcclResult 
- */
-HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
-
 /**
  * @brief Get the world rank id according to the group rank id.
  *
@@ -125,16 +79,6 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank,
  */
 HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);
 
-/**
- * @brief Get the group rank id according to the world rank id.
- *
- * @param worldRank An integer(u32) identifying the world rank id.
- * @param group A string identifying the group name.
- * @param groupRank A pointer identifying the group rank id.
- * @return HcclResult 
- */
-HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);
-
 /**
  * @brief Get the group rank id according to the world rank id.
  *
@@ -145,16 +89,6 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group,
  */
 HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);
 
-/**
- * @brief Create group.
- *
- * @param group A string identifying the group name.
- * @param rankNum An integer(u32) identifying the number of ranks in the group.
- * @param rankIds A list identifying the ranks in the group.
- * @return HcclResult 
- */
-HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);
-
 /**
  * @brief Create group.
  *
@@ -165,14 +99,6 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);
  */
 HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);
 
-/**
- * @brief Destroy group
- *
- * @param group A string identifying the group name.
- * @return HcclResult 
- */
-HcclResult hcom_destroy_group(const char *group);
-
 /**
  * @brief Destroy group
  *
@@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group);
  * @param IdxList A list identifying the index of end gradient in each segment.
  * @return HcclResult
  */
-extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
+extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);
 
 /**
- * @brief Set the gradient split strategy with in the group, according to gradient index.
+ * @brief Set the gradient split strategy with in the group, according to gradient data size.
  *
  * @param group A string identifying the group name.
  * @param segmentNum An integer(u32) identifying the segments number of gradients.
- * @param IdxList A list identifying the index of end gradient in each segment.
+ * @param sizeList A list identifying the percent of each segment.
  * @return HcclResult
  */
-extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);
+extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
 
 /**
- * @brief Set the gradient split strategy with in the group, according to gradient data size.
+ * @brief Initialize hcom executor.
  *
- * @param group A string identifying the group name.
- * @param segmentNum An integer(u32) identifying the segments number of gradients.
- * @param sizeList A list identifying the percent of each segment.
+ * @param void
  * @return HcclResult
  */
-extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);
+HcclResult HcomExecInitialize();
 
 /**
- * @brief Set the gradient split strategy with in the group, according to gradient data size.
+ * @brief Finalize hcom executor.
  *
- * @param group A string identifying the group name.
- * @param segmentNum An integer(u32) identifying the segments number of gradients.
- * @param sizeList A list identifying the percent of each segment.
+ * @param void
  * @return HcclResult
  */
-extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
+HcclResult HcomExecFinalize();
 
 /**
- * @brief Register memories and init resources for remote access.
+ * @brief Put collective communication operation into hcom executor.
  *
- * @param addrList memory addresses for remote access.
- * @param count number of remote memory addresses.
+ * @param opInfo information about collective communication operation.
+ * @param callback callback after collective communication operation.
  * @return HcclResult
  */
-extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);
+HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);
+
+/**
+ * @brief Put remote access operation into hcom executor.
+ *
+ * @param remoteAccessType operation type (read or write).
+ * @param addrInfos address information about collective communication operation.
+ * @param callback callback after collective communication operation.
+ * @return HcclResult
+ */
+HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
+                                       const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
+                                       std::function<void(HcclResult status)> callback);
 
 /**
  * @brief Register memories and init resources for remote access.
@@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis
  */
 extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);
 
-HcclResult HcomExecInitialize();
-
-HcclResult HcomExecFinalize();
-
-HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);
-
-HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
-                               const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
-                               std::function<void(HcclResult status)> callback);
-
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
index ad48f70b..005014ed 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -279,8 +279,9 @@ typedef struct {
 #define M_NAME_MAX MAX_FNAME
 
 #define M_F_OK F_OK
-#define M_R_OK R_OK
+#define M_X_OK X_OK
 #define M_W_OK W_OK
+#define M_R_OK R_OK
 
 #define MM_DT_DIR DT_DIR
 #define MM_DT_REG DT_REG
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
index cecdd4a7..49e97a5d 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -322,6 +322,7 @@ typedef VOID (*mmPf)(VOID);
 #define M_NAME_MAX  _MAX_FNAME
 
 #define M_F_OK 0
+#define M_X_OK 1
 #define M_W_OK 2
 #define M_R_OK 4
 
diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h
new file mode 100644
index 00000000..5fed8960
--- /dev/null
+++ b/third_party/fwkacllib/inc/register/op_kernel_registry.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_
+#define INC_REGISTER_OP_KERNEL_REGISTRY_H_
+#include <memory>
+#include <string>
+#include "register/register_types.h"
+#include "register.h"
+
+namespace ge {
+class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry {
+ public:
+  using CreateFn = HostCpuOp* (*)();
+  ~OpKernelRegistry();
+
+  static OpKernelRegistry& GetInstance() {
+    static OpKernelRegistry instance;
+    return instance;
+  }
+
+  bool IsRegistered(const std::string &op_type);
+
+  void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn);
+
+  std::unique_ptr<HostCpuOp> CreateHostCpuOp(const std::string &op_type);
+
+ private:
+  OpKernelRegistry();
+  class OpKernelRegistryImpl;
+  /*lint -e148*/
+  std::unique_ptr<OpKernelRegistryImpl> impl_;
+};
+} // namespace ge
+
+#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h
new file mode 100644
index 00000000..318eb3ba
--- /dev/null
+++ b/third_party/fwkacllib/inc/register/op_registry.h
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_REGISTER_OP_REGISTRY_H_
+#define INC_REGISTER_OP_REGISTRY_H_
+
+#include <limits.h>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "register/register.h"
+
+namespace domi {
+enum RemoveInputType {
+  OMG_MOVE_TYPE_DTYPE = 0,
+  OMG_MOVE_TYPE_VALUE,
+  OMG_MOVE_TYPE_SHAPE,
+  OMG_MOVE_TYPE_FORMAT,
+  OMG_MOVE_TYPE_AXIS,
+  OMG_MOVE_TYPE_SCALAR_VALUE,
+  OMG_REMOVE_TYPE_WITH_COND = 1000,
+  OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE,
+  OMG_INPUT_REORDER,
+};
+
+struct RemoveInputConfigure {
+  int inputIdx = INT_MAX;
+  std::string attrName;
+  RemoveInputType moveType;
+  bool attrValue = false;
+  std::string originalType;
+  std::vector<int> input_order;
+};
+
+class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
+ public:
+  static OpRegistry *Instance();
+
+  std::vector<OpRegistrationData> registrationDatas;
+
+  bool Register(const OpRegistrationData &reg_data);
+
+  domi::ImplyType GetImplyType(const std::string &op_type);
+
+  void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const domi::ImplyType &imply_type);
+
+  domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type);
+
+  domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type);
+
+  domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type);
+
+  domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type,
+                                                             const std::string &ori_type);
+
+  domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);
+
+  Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func);
+
+  domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype);
+
+  const std::vector<RemoveInputConfigure> &GetRemoveInputConfigure(const std::string &ori_optype) const;
+
+  bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type);
+
+  ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type);
+
+ private:
+  std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
+  std::unordered_map<std::string, ParseParamFunc> op_parse_params_fn_map_;
+  std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
+  std::unordered_map<std::string, FusionParseParamFunc> fusion_op_parse_params_fn_map_;
+  std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_;
+  std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
+  std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
+  std::unordered_map<std::string, std::string> origin_type_to_om_type_;
+  std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_;
+  std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_;
+};
+}  // namespace domi
+#endif  // INC_REGISTER_OP_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index b9b2cbe5..ebfc09f3 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -81,26 +81,17 @@ typedef enum tagRtLimitType {
 } rtLimitType_t;
 
 typedef struct rtExceptionInfo {
-    uint32_t taskid;
-    uint32_t streamid;
-    uint32_t tid;
-    uint32_t deviceid;
+  uint32_t taskid;
+  uint32_t streamid;
+  uint32_t tid;
+  uint32_t deviceid;
+  uint32_t retcode;
 } rtExceptionInfo;
 
-typedef struct rtTaskFailInfo {
-    uint32_t taskid;
-    uint32_t streamid;
-    uint32_t tid;
-    uint32_t deviceid;
-    uint32_t retcode;
-} rtTaskFailInfo;
-
 typedef void (*rtErrorCallback)(rtExceptionType);
 
 typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
 
-typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo);
-
 typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);
 
 /**
@@ -143,13 +134,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type);
  * @ingroup profiling_base
  * @brief start rts profiler.
  */
-RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList);
+RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);
 
 /**
  * @ingroup profiling_base
  * @brief stop rts profiler.
  */
-RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList);
+RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);
 
 /**
  * @ingroup profiling_base
@@ -209,7 +200,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal
  * @param [out] NA
  * @return RT_ERROR_NONE for ok
  */
-RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback);
+RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback);
 
 /**
  * @ingroup dvrt_base
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index 12a407d7..8bfc9893 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -42,6 +42,7 @@ typedef enum tagRtChipType {
   CHIP_MDC,
   CHIP_LHISI,
   CHIP_DC,
+  CHIP_CLOUD_V2,
   CHIP_END,
 } rtChipType_t;
 
@@ -62,6 +63,7 @@ typedef enum tagRtPlatformType {
   PLATFORM_LHISI_ES,
   PLATFORM_LHISI_CS,
   PLATFORM_DC,
+  PLATFORM_CLOUD_V2,
   PLATFORM_END,
 } rtPlatformType_t;
 
@@ -119,7 +121,9 @@ typedef struct tagRtMemoryConfig {
   uint32_t compilerSize;
 } rtMemoryConfig_t;
 
-typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t;
+typedef struct tagRtPlatformConfig {
+  uint32_t platformConfig;
+} rtPlatformConfig_t;
 
 /**
  * @ingroup
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index 4be49a8c..ee0d8f0a 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -47,7 +47,7 @@ typedef struct tagRtGroupInfo {
   uint32_t aivectorNum;
   uint32_t sdmaNum;
   uint32_t activeStreamNum;
-  void*  extrPtr;
+  void *extrPtr;
 } rtGroupInfo_t;
 
 /**
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index d1a91a9b..d6ffbc9a 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -185,7 +185,7 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc);
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
-RTS_API rtError_t rtDeviceCanAccessPeer(int32_t* canAccessPeer, uint32_t device, uint32_t peerDevice);
+RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice);
 
 /**
  * @ingroup dvrt_dev
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index 5f519442..f44b181c 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -387,7 +387,7 @@ typedef void *rtModel_t;
  * @return RT_ERROR_NONE for ok
  * @return RT_ERROR_INVALID_VALUE for error input
  */
- RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);
+RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);
 
 /**
  * @ingroup rt_kernel
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index e65d8604..32bd9e6b 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -159,11 +159,11 @@ typedef struct rtAiCoreMemorySize {
  * @ingroup dvrt_mem
  * @brief memory type
  */
-typedef enum tagRtMemoryType { 
-    RT_MEMORY_TYPE_HOST = 1, 
-    RT_MEMORY_TYPE_DEVICE = 2 , 
-    RT_MEMORY_TYPE_SVM = 3,
-    RT_MEMORY_TYPE_DVPP = 4
+typedef enum tagRtMemoryType {
+  RT_MEMORY_TYPE_HOST = 1,
+  RT_MEMORY_TYPE_DEVICE = 2,
+  RT_MEMORY_TYPE_SVM = 3,
+  RT_MEMORY_TYPE_DVPP = 4
 } rtMemoryType_t;
 
 /**
@@ -179,23 +179,23 @@ typedef struct tagRtPointerAttributes {
 
 
 typedef struct rtMallocHostSharedMemoryIn {
-    const char* name;
-    const uint64_t size;
-    uint32_t flag;
+  const char *name;
+  const uint64_t size;
+  uint32_t flag;
 } rtMallocHostSharedMemoryIn;
 
 typedef struct rtMallocHostSharedMemoryOut {
-    int fd;
-    void* ptr;
-    void* devPtr;
+  int fd;
+  void *ptr;
+  void *devPtr;
 } rtMallocHostSharedMemoryOut;
 
 typedef struct rtFreeHostSharedMemoryIn {
-    const char* name;
-    const uint64_t size;
-    int fd;
-    void* ptr;
-    void* devPtr;
+  const char *name;
+  const uint64_t size;
+  int fd;
+  void *ptr;
+  void *devPtr;
 } rtFreeHostSharedMemoryIn;
 
 
@@ -267,7 +267,7 @@ RTS_API rtError_t rtFreeHost(void *hostPtr);
  */
 
 RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
-    rtMallocHostSharedMemoryOut *out);
+                                           rtMallocHostSharedMemoryOut *out);
 
 /**
  * @ingroup dvrt_mem
diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h
index 388fd3c2..6b9f80ae 100644
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -36,6 +36,7 @@ extern "C" {
 #define RT_STREAM_FORBIDDEN_DEFAULT (0x10)
 #define RT_STREAM_HEAD (0x20)
 #define RT_STREAM_PRIMARY_DEFAULT (0x40)
+#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80)
 
 /**
  * @ingroup stream_type
diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
new file mode 100644
index 00000000..b642cbc8
--- /dev/null
+++ b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
@@ -0,0 +1,52 @@
+/**
+* @file ExternalSoftDp.h
+*
+* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef EXTERNALSOFTDP_H
+#define EXTERNALSOFTDP_H
+
+#include <stdint.h>
+
+extern "C" {
+struct SoftDpProcsessInfo {
+  uint8_t* inputBuffer;
+  uint32_t inputBufferSize;
+
+  uint8_t* outputBuffer;
+  uint32_t outputBufferSize;
+
+  uint32_t outputWidth;
+  uint32_t outputHeight;
+
+  uint32_t reserved;
+};
+
+struct DpCropInfo {
+  uint32_t left;
+  uint32_t right;
+  uint32_t up;
+  uint32_t down;
+};
+
+/*
+ * @brief decode and resize interface
+ * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
+ * @return success: return 0, fail: return error number
+ */
+uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo);
+
+/*
+ * @brief decode crop and resize interface
+ * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
+ * @param [in] const DpCropInfo& cropInfo: crop struct
+ * @return success: return 0, fail: return error number
+ */
+uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo);
+}
+#endif // EXTERNALSOFTDP_H
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index bce58f32..2ebce7d9 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -18,7 +18,9 @@
 #define D_SYSLOG_H_
 
 #ifdef __cplusplus
+#ifndef LOG_CPP
 extern "C" {
+#endif
 #endif // __cplusplus
 
 #ifndef LINUX
@@ -105,6 +107,7 @@ extern "C" {
 #define SECURITY_LOG_MASK   (0x00100000)
 #define RUN_LOG_MASK        (0x01000000)
 #define OPERATION_LOG_MASK  (0x10000000)
+#define RESERVERD_LENGTH 52
 
 typedef struct tagDCODE {
   const char *cName;
@@ -116,6 +119,18 @@ typedef struct tagKV {
   char *value;
 } KeyValue;
 
+typedef enum {
+  APPLICATION = 0,
+  SYSTEM
+} ProcessType;
+
+typedef struct {
+  ProcessType type;
+  unsigned int pid;
+  unsigned int deviceId;
+  char reserved[RESERVERD_LENGTH];
+} LogAttr;
+
 /**
  * @ingroup slog
  *
@@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
  */
 DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);
 
+/**
+ * @ingroup slog
+ * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
+ * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
+
 /**
  * @ingroup slog
  * @brief dlog_error: print error log
@@ -367,6 +390,121 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 
 #ifdef __cplusplus
+#ifndef LOG_CPP
 }
+#endif // LOG_CPP
 #endif // __cplusplus
+
+#ifdef LOG_CPP
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * @ingroup slog
+ * @brief DlogGetlevelForC: get module loglevel and enableEvent
+ *
+ * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid
+ * @param [out]enableEvent: 1: enable; 0: disable
+ * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
+ */
+DLL_EXPORT int DlogGetlevelForC(int moduleId, int *enableEvent);
+
+/**
+ * @ingroup slog
+ * @brief DlogSetlevelForC: set module loglevel and enableEvent
+ *
+ * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid
+ * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
+ * @param [in]enableEvent: 1: enable; 0: disable, others:invalid
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetlevelForC(int moduleId, int level, int enableEvent);
+
+/**
+ * @ingroup slog
+ * @brief CheckLogLevelForC: check module level enable or not
+ * users no need to call it because all dlog interface(include inner interface) has already called
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG
+ * @return: 1:enable, 0:disable
+ */
+DLL_EXPORT int CheckLogLevelForC(int moduleId, int logLevel);
+
+/**
+ * @ingroup slog
+ * @brief DlogSetAttrForC: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
+ * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
+
+/**
+ * @ingroup slog
+ * @brief DlogForC: print log, need caller to specify level
+ * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]fmt: log content
+ */
+#define DlogForC(moduleId, level, fmt, ...)                                                 \
+  do {                                                                                  \
+    if(CheckLogLevelForC(moduleId, level) == 1) {                                           \
+        DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
+     }                                                                                  \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogSubForC: print log, need caller to specify level and submodule
+ * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]submodule: eg: engine
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]fmt: log content
+ */
+#define DlogSubForC(moduleId, submodule, level, fmt, ...)                                                   \
+  do {                                                                                                  \
+    if(CheckLogLevelForC(moduleId, level) == 1) {                                                           \
+        DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
+    }                                                                                                   \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogWithKVForC: print log, need caller to specify level and other paramters
+ * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]pstKVArray: key-value array
+ * @param [in]kvNum: key-value element num in array
+ * @param [in]fmt: log content
+ */
+#define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
+  do {                                                                                                          \
+    if(CheckLogLevelForC(moduleId, level) == 1) {                                                                   \
+        DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                                                           \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogFlushForC: flush log buffer to file
+ */
+DLL_EXPORT void DlogFlushForC(void);
+
+/**
+ * @ingroup slog
+ * @brief Internal log interface, other modules are not allowed to call this interface
+ */
+void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
+void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // LOG_CPP
 #endif // D_SYSLOG_H_

From 9c4dc808d4765b1c46309879935833ed6c43f3b3 Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 30 Dec 2020 10:11:34 +0800
Subject: [PATCH 48/54] rm compile macro

---
 ge/graph/passes/assign_remove_pass.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc
index 72e108c3..e198c2db 100644
--- a/ge/graph/passes/assign_remove_pass.cc
+++ b/ge/graph/passes/assign_remove_pass.cc
@@ -28,6 +28,7 @@ static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA,
                                                         ge::CONSTANT, ge::CONSTANTOP,
                                                         ge::VARIABLE, ge::VARIABLEV2 };
 }
+
 Status AssignRemovePass::Run(NodePtr &node) {
   GELOGD("AssignRemovePass running");
 

From 99479830ce5d034d40a08631190a1892f1e626af Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 30 Dec 2020 16:23:13 +0800
Subject: [PATCH 49/54] rm compile macro

---
 ge/graph/build/memory/CMakeLists.txt | 44 ++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 ge/graph/build/memory/CMakeLists.txt

diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt
new file mode 100644
index 00000000..126e0187
--- /dev/null
+++ b/ge/graph/build/memory/CMakeLists.txt
@@ -0,0 +1,44 @@
+set(SRC_LIST
+    "memory_assigner.cc"
+    "graph_mem_assigner.cc"
+    "binary_block_mem_assigner.cc"
+    "block_mem_assigner.cc"
+    "hybrid_mem_assigner.cc"
+    "max_block_mem_assigner.cc"
+    "var_mem_assign_util.cc"
+)
+
+############ libge_memory.a ############
+add_library(ge_memory STATIC ${SRC_LIST})
+
+target_compile_options(ge_memory PRIVATE
+    -Werror
+    -O2
+    -fno-common
+)
+
+target_compile_definitions(ge_memory PRIVATE
+    google=ascend_private
+    LOG_CPP
+)
+
+target_link_libraries(ge_memory PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+    ascend_protobuf
+    c_sec
+)
+
+target_include_directories(ge_memory PRIVATE
+    ${CMAKE_CURRENT_LIST_DIR}
+    ${GE_CODE_DIR}/ge
+    ${GE_CODE_DIR}/inc
+    ${GE_CODE_DIR}/inc/external
+    ${METADEF_DIR}/inc
+    ${METADEF_DIR}/inc/external
+    ${METADEF_DIR}/inc/external/graph
+    ${GE_CODE_DIR}/inc/framework
+    #### yellow zone ####
+    ${GE_CODE_DIR}/../inc
+    #### blue zone ####
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
+)

From 8b460476adffb7d74360e302bd4e3f2247f8a51c Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Tue, 5 Jan 2021 19:41:10 +0800
Subject: [PATCH 50/54] rm macro

---
 ge/graph/build/memory/CMakeLists.txt | 44 ----------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 ge/graph/build/memory/CMakeLists.txt

diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt
deleted file mode 100644
index 126e0187..00000000
--- a/ge/graph/build/memory/CMakeLists.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-set(SRC_LIST
-    "memory_assigner.cc"
-    "graph_mem_assigner.cc"
-    "binary_block_mem_assigner.cc"
-    "block_mem_assigner.cc"
-    "hybrid_mem_assigner.cc"
-    "max_block_mem_assigner.cc"
-    "var_mem_assign_util.cc"
-)
-
-############ libge_memory.a ############
-add_library(ge_memory STATIC ${SRC_LIST})
-
-target_compile_options(ge_memory PRIVATE
-    -Werror
-    -O2
-    -fno-common
-)
-
-target_compile_definitions(ge_memory PRIVATE
-    google=ascend_private
-    LOG_CPP
-)
-
-target_link_libraries(ge_memory PRIVATE
-    $<BUILD_INTERFACE:intf_pub>
-    ascend_protobuf
-    c_sec
-)
-
-target_include_directories(ge_memory PRIVATE
-    ${CMAKE_CURRENT_LIST_DIR}
-    ${GE_CODE_DIR}/ge
-    ${GE_CODE_DIR}/inc
-    ${GE_CODE_DIR}/inc/external
-    ${METADEF_DIR}/inc
-    ${METADEF_DIR}/inc/external
-    ${METADEF_DIR}/inc/external/graph
-    ${GE_CODE_DIR}/inc/framework
-    #### yellow zone ####
-    ${GE_CODE_DIR}/../inc
-    #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc
-)

From 343c93d67036a8bafdda9b432075bf7c525eabb7 Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 6 Jan 2021 11:26:34 +0800
Subject: [PATCH 51/54] rm compile macro

---
 third_party/fwkacllib/inc/toolchain/plog.h | 59 ++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 third_party/fwkacllib/inc/toolchain/plog.h

diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h
new file mode 100644
index 00000000..0d42e31d
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/plog.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _PLOG_H_
+#define _PLOG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#ifndef LINUX
+#define LINUX 0
+#endif // LINUX
+
+#ifndef WIN
+#define WIN 1
+#endif
+
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE == LINUX)
+#define DLL_EXPORT __attribute__((visibility("default")))
+#else
+#define DLL_EXPORT _declspec(dllexport)
+#endif
+
+/**
+ * @ingroup plog
+ * @brief DlogReportInitialize: init log in service process before all device setting.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportInitialize();
+
+/**
+ * @ingroup plog
+ * @brief DlogReportFinalize: release log resource in service process after all device reset.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportFinalize();
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // D_PLOG_H_

From 04105fb40f512818d471c9a6c53e17adbd25d300 Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 6 Jan 2021 11:44:12 +0800
Subject: [PATCH 52/54] rm compile macro

---
 tests/ut/common/graph/CMakeLists.txt | 1 +
 tests/ut/ge/CMakeLists.txt           | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt
index 99b21182..1c64dce1 100644
--- a/tests/ut/common/graph/CMakeLists.txt
+++ b/tests/ut/common/graph/CMakeLists.txt
@@ -67,6 +67,7 @@ set(SRC_FILES
     "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/buffer.cc"
+    "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
     "${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
     "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc"
     "${GE_CODE_DIR}/metadef/graph/graph.cc"
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 3a06507c..72cbaf63 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -89,6 +89,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/buffer.cc"
+	"${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
     "${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
     "${GE_CODE_DIR}/metadef/graph/graph.cc"
     "${GE_CODE_DIR}/metadef/graph/gnode.cc"

From 46ea5518d1970968384e545d43cb072c88444b8e Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 6 Jan 2021 12:52:01 +0800
Subject: [PATCH 53/54] rm compile macro

---
 tests/depends/cce/CMakeLists.txt | 1 +
 tests/ut/ge/CMakeLists.txt       | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/depends/cce/CMakeLists.txt b/tests/depends/cce/CMakeLists.txt
index 85e69e6d..7550c63f 100644
--- a/tests/depends/cce/CMakeLists.txt
+++ b/tests/depends/cce/CMakeLists.txt
@@ -46,6 +46,7 @@ set(SRCS
     "${GE_CODE_DIR}/metadef/graph/anchor.cc"
     "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/buffer.cc"
+    "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
     "${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
     "${GE_CODE_DIR}/metadef/graph/graph.cc"
     "${GE_CODE_DIR}/metadef/graph/model.cc"
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 72cbaf63..2ebe9fc9 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -89,7 +89,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/attr_value.cc"
     "${GE_CODE_DIR}/metadef/graph/buffer.cc"
-	"${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
+    "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
     "${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
     "${GE_CODE_DIR}/metadef/graph/graph.cc"
     "${GE_CODE_DIR}/metadef/graph/gnode.cc"
@@ -228,6 +228,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc"
+    "${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc"
@@ -304,6 +305,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/common/local_context.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
     "${GE_CODE_DIR}/ge/common/model_saver.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
@@ -371,6 +373,7 @@ set(GRAPH_LOAD_COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
     "${GE_CODE_DIR}/ge/common/thread_pool.cc"
 )
 

From 0ad4302f4e3e7d2cdd8714d1e43e04be1b68345a Mon Sep 17 00:00:00 2001
From: chenyemeng <chenyemeng@huawei.com>
Date: Wed, 6 Jan 2021 13:07:57 +0800
Subject: [PATCH 54/54] rm compile macro

---
 tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc
index 6d34ab59..5c75bd01 100644
--- a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc
+++ b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc
@@ -230,7 +230,7 @@ TEST_F(UtestGeTensor, test_tensor_invalid_null) {
   GeTensor tensor(msg_owner, nullptr);
   EXPECT_EQ(tensor.GetData().size(), 0);
   EXPECT_EQ(tensor.MutableData().size(), 0);
-  EXPECT_EQ(tensor.SetData(Buffer(100)), ge::GRAPH_PARAM_INVALID);
+  EXPECT_EQ(tensor.SetData(Buffer(100)), GRAPH_SUCCESS);
 
   TensorUtils::SetWeightSize(tensor.MutableTensorDesc(), 100);
   EXPECT_EQ(TensorUtils::GetWeightSize(tensor), 0);