update RELEASE.md.

!1526 update commite id
From: @shenwei41 Reviewed-by: @liucunwei,@xsmq Signed-off-by: @liucunwei
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,8 +1,8 @@
 [submodule "parser"]
 	path = parser
 	url = https://gitee.com/ascend/parser.git
 	branch = master
 	branch = r1.3.0
 [submodule "metadef"]
 	path = metadef
 	url = https://gitee.com/ascend/metadef.git
 	branch = master
 	branch = r1.3.0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,8 +82,8 @@ if (ENABLE_OPEN_SRC)
    elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
    else()
        find_module(slog libalog.so ${ASCEND_ATC_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
        find_module(slog libalog.so ${ASCEND_ATC_DIR} ${ASCEND_DRIVER_COMMON_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
        if(PLATFORM STREQUAL "train")
            find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
            find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
@@ -150,6 +150,7 @@ elseif(ENABLE_MS_TESTCASES)
    include(cmake/external_libs/protobuf_static.cmake)
    include(cmake/external_libs/protoc.cmake)
    include(cmake/external_libs/securec.cmake)
    include(cmake/external_libs/json.cmake)
    include(cmake/FindModule.cmake)
    include(cmake/intf_pub_linux.cmake)

--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,18 @@
 ### Major Features and Improvements
 * Multiple parallel communication groups can be distinguished and the communication tasks of different parallel communication groups can be divided into different streams.
 * Parallel tasks are added for the entire map optimization. The execution sequence is optimized for parallel groups.
 * Dynamic shape is supported for single operator in inference scenarios.
 * Online inference supports concurrent execution of multiple threads.
 * Memory allocation supports address reuse in the buffer pool.
 * Supports Event resource reuse.
 * Supports the BF16 data type.

 ## Thanks to our Contributors
 Thanks goes to these wonderful people: wuweikang，weiyang，yanghaorang，xutianchun，shibeiji，zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong Contributions of any kind are welcome!

 Contributions of any kind are welcome!


 # Release 1.0.0

 ## Major Features and Improvements
--- a/build.sh
+++ b/build.sh
@@ -229,7 +229,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
    rm -rf ${BASEPATH}/cov
    mkdir ${BASEPATH}/cov
    lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
    lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
    lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '/usr/include/*' '*/metadef/*' '*/parser/*' -o cov/coverage.info
    cd ${BASEPATH}/cov
    genhtml coverage.info
 fi
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -9,10 +9,6 @@ if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 #elseif (ENABLE_GITEE)
 #    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
 #    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
 #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 else()
    set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -31,6 +31,7 @@ set(PROTO_HEADER_LIST
 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST})
 protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST})
 protobuf_generate(ge_client PROTO_CLIENT_HEADER_SRCS PROTO_CLIENT_HEADER_HDRS ${PROTO_HEADER_LIST})

 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_proto_common.a ############
@@ -56,7 +57,7 @@ target_link_libraries(ge_proto_common PRIVATE

 ############ libge_proto_client.a ############
 add_library(ge_proto_client STATIC
    ${PROTO_HEADER_HDRS}
    ${PROTO_CLIENT_HEADER_HDRS}
    ${PROTO_CLIENT_SRCS}
 )

@@ -65,6 +66,11 @@ target_compile_definitions(ge_proto_client PRIVATE
    google=ascend_private
 )

 target_include_directories(ge_proto_client PRIVATE
    ${CMAKE_BINARY_DIR}/proto/ge_client
    ${CMAKE_BINARY_DIR}/proto/ge_client/proto
 )

 target_compile_options(ge_proto_client PRIVATE
    -O2
    -fno-common
--- a/ge/analyzer/analyzer.cc
+++ b/ge/analyzer/analyzer.cc
@@ -221,7 +221,10 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_
  try {
    json_file_ << jsn.dump(kJsonDumpLevel) << std::endl;
  } catch (nlohmann::detail::type_error &e) {
    GELOGE(FAILED, "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s], session_id:%lu, graph_id:%lu", json_file_name_.c_str(), e.what(), session_id, graph_id);
    GELOGE(FAILED,
           "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s],"
 	         "session_id:%lu, graph_id:%lu",
           json_file_name_.c_str(), e.what(), session_id, graph_id);
    ret_failed = true;
  }
  json_file_.close();
@@ -241,7 +244,9 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) {
  GE_CHECK_NOTNULL(graph_info);
  auto status = SaveOpInfo(desc, data_info, graph_info);
  if (status != SUCCESS) {
    GELOGE(status, "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", desc->GetName().c_str(), desc->GetType().c_str());
    GELOGE(status,
           "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!",
           desc->GetName().c_str(), desc->GetType().c_str());
    return FAILED;
  }
  // create json file
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -16,6 +16,7 @@ set(PROTO_LIST
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST})

 set(SRC_LIST
    "context/ctx.cc"
@@ -127,7 +128,7 @@ target_link_libraries(ge_common PRIVATE
 )

 ############ libge_common.a ############
 add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS})
 add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS})
 target_compile_definitions(ge_common_static PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    HOST_VISIBILITY
@@ -158,7 +159,7 @@ target_include_directories(ge_common_static PRIVATE
    ${METADEF_DIR}/inc/external/graph
    ${METADEF_DIR}/inc/graph
    ${CMAKE_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/proto/ge
    ${CMAKE_BINARY_DIR}/proto/ge_static
    #### yellow zone ####
    ${GE_DEPEND_DIR}/inc
    ${GE_DEPEND_DIR}/inc/cce
--- a/ge/common/dump/dump_manager.cc
+++ b/ge/common/dump/dump_manager.cc
@@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
  dump_mode = dump_config.dump_mode;
  GELOGI("Dump mode is %s", dump_mode.c_str());
  dump_properties.SetDumpMode(dump_mode);
  dump_properties_map_.emplace(kInferSessionId, dump_properties);
  dump_properties_map_[kInferSessionId] = dump_properties;

  return SUCCESS;
 }
--- a/ge/common/dump/dump_op.cc
+++ b/ge/common/dump/dump_op.cc
@@ -20,6 +20,7 @@
 #include "common/ge/datatype_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "framework/common/types.h"
 #include "graph/anchor.h"
 #include "graph/ge_tensor.h"
 #include "graph/op_desc.h"
@@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond
  loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
 }

 void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) {
 void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name,
                                 uint32_t dynamic_model_id) {
  dynamic_model_name_ = dynamic_model_name;
  dynamic_om_name_ = dynamic_om_name;
  dynamic_model_id_ = dynamic_model_id;
 }

@@ -200,6 +203,32 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
  return SUCCESS;
 }

 Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) {
  if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) {
    GELOGI("Single op dump, no need set model name");
    return SUCCESS;
  }
  std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
  bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end();
  bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end();
  std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_;
  if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
    if (not_find_by_omname && not_find_by_modelname) {
      std::string model_list_str;
      for (auto &model : model_list) {
        model_list_str += "[" + model + "].";
      }
      GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str());
      return FAILED;
    }
  }
  if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) {
    GELOGD("Dump model name is %s", dump_model_name.c_str());
    op_mapping_info.set_model_name(dump_model_name);
  }
  return SUCCESS;
 }

 Status DumpOp::LaunchDumpOp() {
  GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str());
  int32_t device_id = 0;
@@ -209,8 +238,7 @@ Status DumpOp::LaunchDumpOp() {
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  if (device_id < 0) {
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
           "Check device_id failed, device_id = %d, which should be not less than 0.",
    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.",
           device_id);
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }
@@ -220,11 +248,12 @@ Status DumpOp::LaunchDumpOp() {
  op_mapping_info.set_flag(kAicpuLoadFlag);
  op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
  op_mapping_info.set_model_id(dynamic_model_id_);
  if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) {
    op_mapping_info.set_model_name(dynamic_model_name_);

  if (SetDumpModelName(op_mapping_info) != SUCCESS) {
    return SUCCESS;
  }
  SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
  GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(),
         dump_path.c_str());
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
@@ -273,4 +302,4 @@ Status DumpOp::LaunchDumpOp() {
  }
  return SUCCESS;
 }
 }  // namesapce ge
 }  // namespace ge
--- a/ge/common/dump/dump_op.h
+++ b/ge/common/dump/dump_op.h
@@ -34,12 +34,13 @@ class DumpOp {
                   vector<uintptr_t> output_addrs, rtStream_t stream);
  Status LaunchDumpOp();
  void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond);
  void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id);
  void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id);

 private:
  Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info);
  Status DumpOutput(aicpu::dump::Task &task);
  Status DumpInput(aicpu::dump::Task &task);
  Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info);

  DumpProperties dump_properties_;
  OpDescPtr op_desc_;
@@ -54,6 +55,7 @@ class DumpOp {
  uintptr_t loop_cond_;

  std::string dynamic_model_name_;
  std::string dynamic_om_name_;
  std::uint32_t dynamic_model_id_;
 };
 }  // namespace ge
--- a/ge/common/dump/dump_properties.cc
+++ b/ge/common/dump/dump_properties.cc
@@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on";
 const uint32_t kAicoreOverflow = (0x1 << 0);
 const uint32_t kAtomicOverflow = (0x1 << 1);
 const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
 }
 }  // namespace
 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) {
  CopyFrom(other);
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=(
    const DumpProperties &other) {
  const DumpProperties &other) {
  CopyFrom(other);
  return *this;
 }
@@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti

 // The following is the new dump scenario of the fusion operator
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue(
    const std::string &model, const std::set<std::string> &layers) {
  const std::string &model, const std::set<std::string> &layers) {
  for (const std::string &layer : layers) {
    GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str());
  }
@@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue(
    const std::string &model) const {
  const std::string &model) const {
  auto iter = model_dump_properties_map_.find(model);
  if (iter != model_dump_properties_map_.end()) {
    return iter->second;
@@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump(
    const std::string &model, const std::string &om_name, const std::string &op_name) const {
  const std::string &model, const std::string &om_name, const std::string &op_name) const {
  // if dump all
  GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str());
  if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) {
    return true;
  }
@@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch(
    const std::string &dump_op_switch) {
  const std::string &dump_op_switch) {
  dump_op_switch_ = dump_op_switch;
 }

@@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() {
    GELOGI("ge.exec.enableDumpDebug is false or is not set.");
  }
 }
 }  // namespace
 }  // namespace ge
--- a/ge/common/dump/opdebug_register.cc
+++ b/ge/common/dump/opdebug_register.cc
@@ -80,13 +80,11 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de

  uint32_t debug_stream_id = 0;
  uint32_t debug_task_id = 0;
 #ifdef ONLY_COMPILE_OPEN_SRC
  auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
 #endif
  GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id);
  data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
  return SUCCESS;
@@ -94,7 +92,6 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de

 void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
  rtError_t rt_ret = RT_ERROR_NONE;
 #ifdef ONLY_COMPILE_OPEN_SRC
  if (stream != nullptr) {
    GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow.");
    rt_ret = rtDebugUnRegisterForStream(stream);
@@ -102,8 +99,6 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
      GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret);
    }
  }
 #endif

  if (op_debug_addr_ != nullptr) {
    rt_ret = rtFree(op_debug_addr_);
    if (rt_ret != RT_ERROR_NONE) {
--- a/ge/common/formats/format_transfers/datatype_transfer.cc
+++ b/ge/common/formats/format_transfers/datatype_transfer.cc
@@ -154,7 +154,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size);
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }

--- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc
+++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc
@@ -73,7 +73,8 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
--- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc
+++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc
@@ -94,7 +94,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) {

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -122,7 +123,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) {
                               args.data + src_idx * data_size, static_cast<size_t>(data_size));
              }
              if (ret != EOK) {
                GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
                GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                       "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
                       dst_offset, ret, pad_zero);
                return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
              }
--- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc
+++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc
@@ -95,7 +95,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -123,7 +124,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul
                               args.data + src_idx * data_size, static_cast<size_t>(data_size));
              }
              if (ret != EOK) {
                GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
                GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                       "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
                       dst_offset, ret, pad_zero);
                return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
              }
--- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
@@ -139,7 +139,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -175,7 +176,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con
        auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                            static_cast<size_t>(size * w0));
        if (ret != EOK) {
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                 "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
        }
      }
@@ -189,7 +191,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con
        auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                            static_cast<size_t>(size));
        if (ret != EOK) {
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                 "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
        }
      }
@@ -210,7 +213,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -246,7 +250,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con
        ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                       static_cast<size_t>(size * w0));
        if (ret != EOK) {
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                 "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
        }
      }
@@ -260,7 +265,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con
        ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                       static_cast<size_t>(size));
        if (ret != EOK) {
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                 "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
          return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
        }
      }
@@ -274,14 +280,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con

 Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) {
  if (!IsDataTypeSupport(args.src_data_type)) {
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
           "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
    return ACL_ERROR_GE_DATATYPE_INVALID;
  }
  if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
           "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -325,7 +333,8 @@ Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector

 Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) {
  if (!IsDataTypeSupport(args.src_data_type)) {
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
           "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -333,7 +342,8 @@ Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult
  }

  if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
           "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
--- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc
@@ -127,7 +127,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      dst == nullptr,
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
             "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
             TypeUtils::FormatToSerialString(args.src_format).c_str(),
             TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;);
@@ -173,8 +174,9 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) {
            }
          }
          if (ret != EOK) {
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset,
                   ret, need_pad_zero);
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                   "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d",
                   offset, ret, need_pad_zero);
            return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
          }
        }
@@ -213,7 +215,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      dst == nullptr,
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
             "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
             TypeUtils::FormatToSerialString(args.src_format).c_str(),
             TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;);
@@ -235,7 +238,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) {
                             static_cast<size_t>(data_size));
            } else {
              if (protected_size < data_size) {
                GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld",
                GELOGE(ACL_ERROR_GE_PARAM_INVALID,
                       "Failed to operate the dst memory, protected_size is %ld and size is %ld",
                       protected_size, data_size);
                return ACL_ERROR_GE_PARAM_INVALID;
              }
@@ -247,7 +251,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) {
              }
            }
            if (ret != EOK) {
              GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
              GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                     "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
                     dst_offset, ret, pad_zero);
              return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
            }
@@ -288,7 +293,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      dst == nullptr,
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
             "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
             TypeUtils::FormatToSerialString(args.src_format).c_str(),
             TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
      return ACL_ERROR_GE_MEMORY_ALLOCATION;);
@@ -310,7 +316,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) {
                             static_cast<size_t>(data_size));
            } else {
              if (protected_size < data_size) {
                GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld",
                GELOGE(ACL_ERROR_GE_PARAM_INVALID,
                       "Failed to operate the dst memory, protected_size is %ld and size is %ld",
                       protected_size, data_size);
                return ACL_ERROR_GE_PARAM_INVALID;
              }
@@ -322,7 +329,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) {
              }
            }
            if (ret != EOK) {
              GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
              GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                     "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
                     dst_offset, ret, pad_zero);
              return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
            }
--- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
@@ -140,7 +140,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -179,7 +180,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size * w0));
          if (ret != EOK) {
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                   "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
          }
        }
@@ -195,7 +197,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size));
          if (ret != EOK) {
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                   "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
          }
        }
@@ -217,7 +220,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -257,7 +261,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size * w0));
          if (ret != EOK) {
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                   "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
          }
        }
@@ -273,7 +278,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size));
          if (ret != EOK) {
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
                   "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
            return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
          }
        }
@@ -288,14 +294,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con

 Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) {
  if (!IsDataTypeSupport(args.src_data_type)) {
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
           "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
    return ACL_ERROR_GE_DATATYPE_INVALID;
  }
  if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
           "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -339,7 +347,8 @@ Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector

 Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) {
  if (!IsDataTypeSupport(args.src_data_type)) {
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
    GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
           "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -347,7 +356,8 @@ Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult
  }

  if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
           "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
           ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
--- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc
@@ -66,7 +66,7 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) {
        FmtToStr(ShapeToString(dst_shape));
    GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str());
    return ACL_ERROR_GE_SHAPE_INVALID;
  } 
  }

  return SUCCESS;
 }
@@ -74,7 +74,8 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
--- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc
@@ -59,9 +59,10 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) {
  }
  int64_t c1 = Ceil(dst_shape.at(kNchwC), c0);
  int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize));
  if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || src_shape.at(kFracZC0) != c0 ||
      src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
  if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 ||
      src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
           "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
           ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str());
    return ACL_ERROR_GE_SHAPE_INVALID;
  }
@@ -72,7 +73,8 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
--- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc
@@ -59,9 +59,10 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) {
  }
  int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0);
  int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize));
  if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || src_shape.at(kFracZC0) != c0 ||
      src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
    GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
  if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 ||
      src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
    GELOGE(PARAM_INVALID,
           "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
           ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str());
    return PARAM_INVALID;
  }
@@ -72,7 +73,8 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(OUT_OF_MEMORY,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return OUT_OF_MEMORY;
@@ -140,7 +142,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult &
    }

    GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size,
        ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str());
           ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str());
    return PARAM_INVALID;
  }
  GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld",
--- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc
+++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc
@@ -91,7 +91,8 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
--- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc
@@ -72,7 +72,8 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(OUT_OF_MEMORY,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return OUT_OF_MEMORY;
--- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc
@@ -61,7 +61,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) {
  if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) ||
      src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 ||
      src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) {
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
    GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
           "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
           ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str());
    return ACL_ERROR_GE_SHAPE_INVALID;
  }
@@ -72,7 +73,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
--- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
@@ -125,7 +125,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) {
                  return ACL_ERROR_GE_INTERNAL_ERROR);
  auto t1 = h_o * w_o;
  auto t2 = n_o * c_o;
  GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
  GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2),
                  GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
                  return ACL_ERROR_GE_INTERNAL_ERROR);

  int64_t total_ele_cnt = n_o * c_o * h_o * w_o;
@@ -140,7 +141,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) {

  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -212,7 +214,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin
                  return ACL_ERROR_GE_INTERNAL_ERROR);
  auto t1 = h_o * w_o;
  auto t2 = n_o * c_o;
  GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
  GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2),
                  GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,"int64 mul overflow.A[%ld], B[%ld]", t1, t2);
                  return ACL_ERROR_GE_INTERNAL_ERROR);

  int64_t total_ele_cnt = n_o * c_o * h_o * w_o;
@@ -228,7 +231,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin

  dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -275,7 +279,8 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult
  }

  std::vector<int64_t> expect_shape;
  ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, args_tmp.dst_format, expect_shape);
  ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type,
                   args_tmp.dst_format, expect_shape);
  if (ret != SUCCESS) {
    return ret;
  }
--- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc
@@ -92,7 +92,8 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) {
 Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
  std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
  if (dst == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
           TypeUtils::FormatToSerialString(args.src_format).c_str(),
           TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
--- a/ge/common/helper/model_helper.cc
+++ b/ge/common/helper/model_helper.cc
@@ -87,12 +87,13 @@ Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) {

  std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr();
  if (model_task_def == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
    GELOGD("SaveSizeToModelDef task_info_size is 0.");
    om_info.push_back(0);
  } else {
    size_t partition_task_size = model_task_def->ByteSizeLong();
    GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size);
    om_info.push_back(partition_task_size);
  }
  size_t partition_task_size = model_task_def->ByteSizeLong();
  GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size);
  om_info.push_back(partition_task_size);

  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info),
                   GELOGE(FAILED, "SetListInt of om_info_list failed.");
@@ -598,6 +599,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) {
      is_first_model = false;
      root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph()));
      root_model_->SetModelId(cur_model->GetModelId());
      root_model_->SetModelName(cur_model->GetName());
      model_ = cur_model;
      continue;
    }
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -31,7 +31,7 @@ const char *const kFpPoint = "fp_point";
 const char *const kBpPoint = "bp_point";

 #ifdef DAVINCI_SUPPORT_PROFILING
 const size_t kReportMaxLen = 2048;
 const size_t kReportMaxLen = 1024;
 const int32_t kMaxDeviceNum = 256;
 const uint32_t kInteval = 2;
 const std::string kConfigNumsdev = "devNums";
--- a/ge/common/tbe_kernel_store.cc
+++ b/ge/common/tbe_kernel_store.cc
@@ -15,6 +15,8 @@
 */

 #include "common/tbe_kernel_store.h"
 #include "graph/utils/attr_utils.h"
 #include "graph/debug/ge_attr_define.h"

 namespace ge {

@@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc>
      GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin),
                      GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");)
      GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize());

      std::string atomic_kernel_name;
      (void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name);
      if (!atomic_kernel_name.empty()) {
        GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str());
        auto atomic_kernel_bin = FindKernel(atomic_kernel_name);
        GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin),
                        GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");)
      }
    }
  }
 }
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -8,6 +8,7 @@ set(PROTO_LIST
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST})

 set(SRC_LIST
    "ge_executor.cc"
@@ -162,7 +163,7 @@ set(SRC_LIST
 )

 ######## libge_executor.a ########
 add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS})
 add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS})

 target_compile_options(ge_executor PRIVATE
    $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
@@ -191,7 +192,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE
    ${METADEF_DIR}/inc/external/graph
    ${METADEF_DIR}/inc/graph
    ${CMAKE_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/proto/ge
    ${CMAKE_BINARY_DIR}/proto/ge_static
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    ${GE_CODE_DIR}/../inc/cce
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -30,6 +30,8 @@
 #include "single_op/single_op_manager.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
 #include "graph/opsproto_manager.h"
 #include "ge_local_engine/engine/host_cpu_engine.h"

 using std::string;
 using std::vector;
@@ -199,6 +201,33 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,
 namespace ge {
 bool GeExecutor::isInit_ = false;

 static void InitOpsProtoManager() {
  string opsproto_path;
  const char *path_env = std::getenv("ASCEND_OPP_PATH");
  if (path_env != nullptr) {
    string path = path_env;
    string file_path = RealPath(path.c_str());
    if (file_path.empty()) {
      GELOGE(FAILED, "[Check][EnvPath]ASCEND_OPP_PATH path [%s] is invalid.", path.c_str());
      REPORT_INPUT_ERROR("E68016", {"ASCEND_OPP_PATH", path}); 
      return;
    }
    opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
    GELOGI("Get opsproto so path from env : %s", path.c_str());
  } else {
    string path_base = PluginManager::GetPath();
    GELOGI("path_base is %s", path_base.c_str());
    path_base = path_base.substr(0, path_base.rfind('/'));
    path_base = path_base.substr(0, path_base.rfind('/') + 1);
    opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
  }
  GELOGI("Get opsproto path is %s", opsproto_path.c_str());
  OpsProtoManager *manager = OpsProtoManager::Instance();
  map<string, string> option_tmp;
  option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
  (void)manager->Initialize(option_tmp);
 }

 GeExecutor::GeExecutor() {}

 Status GeExecutor::Initialize() {
@@ -208,6 +237,16 @@ Status GeExecutor::Initialize() {
    return ge::SUCCESS;
  }

  OpTilingManager::GetInstance().LoadSo();

  Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize();
  if (init_hostcpu_engine_status != SUCCESS) {
    GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine");
    return init_hostcpu_engine_status;
  }

  InitOpsProtoManager();

  std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
  mem_type.push_back(RT_MEMORY_P2P_DDR);
  auto ret = MemManager::Instance().Initialize(mem_type);
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@@ -20,6 +20,8 @@ set(OPS_KERNEL_SRC_LIST
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 protobuf_generate(ge_ops_shared PROTO_OPS_SHARED_SRCS PROTO_OPS_SHARED_HDRS ${PROTO_LIST})
 protobuf_generate(ge_ops_static PROTO_OPS_STATIC_SRCS PROTO_OPS_STATIC_HDRS ${PROTO_LIST})

 ############ libge_local_engine.so ############
 add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
@@ -119,7 +121,7 @@ set_target_properties(atc_ge_local_engine PROPERTIES
 )

 ############ libge_local_opskernel_builder.so ############
 add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
 add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS})

 target_compile_options(ge_local_opskernel_builder PRIVATE
    -Werror
@@ -143,7 +145,7 @@ target_include_directories(ge_local_opskernel_builder PRIVATE
    ${METADEF_DIR}/inc/external/graph
    ${METADEF_DIR}/inc/graph
    ${CMAKE_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/proto/ge
    ${CMAKE_BINARY_DIR}/proto/ge_ops_shared
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    #### blue zone ####
@@ -166,7 +168,7 @@ target_link_libraries(ge_local_opskernel_builder PRIVATE
 )

 ############ atclib/libge_local_opskernel_builder.so ############
 add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
 add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS})

 target_compile_options(atc_ge_local_opskernel_builder PRIVATE
    -Werror
@@ -190,7 +192,7 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE
    ${METADEF_DIR}/inc/external/graph
    ${METADEF_DIR}/inc/graph
    ${CMAKE_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/proto/ge
    ${CMAKE_BINARY_DIR}/proto/ge_ops_shared
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    #### blue zone ####
@@ -218,7 +220,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
 )

 ############ libge_local_opskernel_builder.a ############
 add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
 add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_STATIC_HDRS})

 target_compile_options(ge_local_opskernel_builder_static PRIVATE
    -Werror
@@ -243,7 +245,7 @@ target_include_directories(ge_local_opskernel_builder_static PRIVATE
    ${METADEF_DIR}/inc/external/graph
    ${METADEF_DIR}/inc/graph
    ${CMAKE_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/proto/ge
    ${CMAKE_BINARY_DIR}/proto/ge_ops_static
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    #### blue zone ####
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -16,6 +16,7 @@ set(GE_SRC_LIST
    "task/label_goto_task.cc"
    "task/label_set_task.cc"
    "task/label_switch_task.cc"
    "task/label_manager.cc"
 )

 add_library(ge_runtime SHARED ${GE_SRC_LIST})
--- a/ge/ge_runtime/runtime_model.cc
+++ b/ge/ge_runtime/runtime_model.cc
@@ -21,6 +21,7 @@
 #include "common/ge_inner_error_codes.h"
 #include "common/types.h"
 #include "common/util.h"
 #include "common/math/math_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/op/op_parser_util.h"
 #include "graph/types.h"
--- a/ge/ge_runtime/task/hccl_task.cc
+++ b/ge/ge_runtime/task/hccl_task.cc
@@ -52,15 +52,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<Hccl
  }
 }

 HcclTask::~HcclTask() {
  if (workspace_mem_ != nullptr) {
    rtError_t rt_ret = rtFree(workspace_mem_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret);
    }
    workspace_mem_ = nullptr;
  }
 }
 HcclTask::~HcclTask() {}

 bool HcclTask::Distribute() {
  // Ops kernel info store
@@ -79,11 +71,7 @@ bool HcclTask::Distribute() {
  SetSecondaryStream();

  if (task_info_->workspace_size() > 0) {
    rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      return false;
    }
    workspace_mem_ = task_info_->workspace_addr();
  }

  GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
--- a/ge/ge_runtime/task/label_goto_task.cc
+++ b/ge/ge_runtime/task/label_goto_task.cc
@@ -16,99 +16,83 @@

 #include "ge_runtime/task/label_goto_task.h"
 #include "ge_runtime/task/task_factory.h"
 #include "framework/common/util.h"

 namespace ge {
 namespace model_runner {
 LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info)
    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) {
    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info),
      task_info_(task_info),
      stream_(nullptr),
      index_value_(nullptr) {
  if (task_info_ == nullptr) {
    GELOGW("task_info_ is null!");
    return;
  }
  auto stream_list = model_context.stream_list();
  auto label_list = model_context.label_list();
  rt_model_handle_ = model_context.rt_model_handle();
  uint32_t stream_id = task_info->stream_id();
  uint32_t label_id = task_info->label_id();
  label_id_ = task_info->label_id();
  GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id);
  if (stream_id >= stream_list.size() || label_id >= label_list.size()) {
  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_);
  if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) {
    GELOGW("Stream/Label id invalid.");
    return;
  }
  stream_ = stream_list[stream_id];
  label_ = label_list[label_id];
  label_manager_ = LabelManager::GetInstance();
  if (label_manager_ == nullptr) {
    GELOGW("Get label manager instance failed.");
    return;
  }
  label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list);
 }

 LabelGotoTask::~LabelGotoTask() {
  GE_FREE_RT_LOG(label_info_);
  GE_FREE_RT_LOG(index_value_);
  if (index_value_ != nullptr) {
    rtError_t rt_ret = rtFree(index_value_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret);
    }
    index_value_ = nullptr;
  }
 }

 bool LabelGotoTask::Distribute() {
  GELOGI("LabelGotoTask Distribute start.");
  if (!CheckParamValid()) {
    return false;
  }

  const std::vector<void *> label_list = { label_ };
  rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
    return false;
  }

  uint64_t branch_index = 0;
  rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
    return false;
  }

  uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size();
  rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
    return false;
  }

  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
    return false;
  }

  rt_ret = rtLabelSwitchByIndex(index_value_, label_list.size(), label_info_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
    return false;
  }

  GELOGI("DistributeTask end.");
  return true;
 }

 bool LabelGotoTask::CheckParamValid() {
  if (stream_ == nullptr) {
    GELOGE(PARAM_INVALID, "stream is null!");
    return false;
  }

  if (label_ == nullptr) {
    GELOGE(PARAM_INVALID, "label is null!");
  if (label_info_ == nullptr) {
    GELOGE(PARAM_INVALID, "label info is null!");
    return false;
  }

  if (label_info_ != nullptr) {
    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
    return false;
  if (index_value_ == nullptr) {
    rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      return false;
    }

    uint64_t index = 0;
    rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      return false;
    }
  }

  if (index_value_ != nullptr) {
    GELOGE(PARAM_INVALID, "index_value_ has dirty data.");
  void *label_info = label_info_->GetLabelInfo();
  rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  GELOGI("DistributeTask end.");
  return true;
 }

--- a/ge/ge_runtime/task/label_goto_task.h
+++ b/ge/ge_runtime/task/label_goto_task.h
@@ -18,7 +18,11 @@
 #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_

 #include <memory>
 #include <vector>
 #include <map>
 #include <mutex>
 #include "ge_runtime/task/task.h"
 #include "ge_runtime/task/label_manager.h"

 namespace ge {
 namespace model_runner {
@@ -31,13 +35,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> {
  bool Distribute() override;

 private:
  bool CheckParamValid();

  std::shared_ptr<LabelGotoTaskInfo> task_info_;
  void *stream_{nullptr};
  void *label_{nullptr};
  void *label_info_{nullptr};
  void *index_value_{nullptr};
  void *stream_;
  std::shared_ptr<LabelGuard> label_info_;
  void *index_value_;
  uint32_t label_id_;
  rtModel_t rt_model_handle_;
  std::shared_ptr<LabelManager> label_manager_;
 };
 }  // namespace model_runner
 }  // namespace ge
--- a/ge/ge_runtime/task/label_manager.cc
+++ b/ge/ge_runtime/task/label_manager.cc
@@ -0,0 +1,119 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ge_runtime/task/label_manager.h"
 #include <algorithm>
 #include <string>
 #include "runtime/mem.h"
 #include "runtime/rt_model.h"
 #include "common/ge_inner_error_codes.h"
 #include "framework/common/debug/ge_log.h"

 namespace ge {
 namespace model_runner {
 std::weak_ptr<LabelManager> LabelManager::instance_;
 std::mutex LabelManager::instance_mutex_;

 template <class T>
 static std::string GetVectorString(const std::vector<T> &vec) {
  std::string ret;
  for (size_t i = 0; i < vec.size(); ++i) {
    if (i != 0) {
      ret.push_back(',');
    }
    ret += std::to_string(vec[i]);
  }
  return ret;
 }

 LabelGuard::~LabelGuard() {
  void *label_info = GetLabelInfo();
  if (label_info != nullptr) {
    rtError_t rt_ret = rtFree(label_info);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret);
    }
  }
 }

 std::shared_ptr<LabelManager> LabelManager::GetInstance() {
  std::lock_guard<std::mutex> lock(instance_mutex_);
  auto instance = instance_.lock();
  if (instance != nullptr) {
    return instance;
  }

  instance = std::make_shared<LabelManager>();
  instance_ = instance;
  return instance;
 }

 std::shared_ptr<LabelGuard> LabelManager::GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids,
                                                       const std::vector<void *> &all_label) {
  std::lock_guard<std::mutex> lock(model_info_mapping_mutex_);
  rtError_t rt_ret;
  auto model_iter = model_info_mapping_.find(model);
  if (model_iter == model_info_mapping_.end()) {
    model_info_mapping_.emplace(model, std::map<std::string, std::weak_ptr<LabelGuard>>());
    model_iter = model_info_mapping_.find(model);
  }

  std::string label_id_str = GetVectorString(label_ids);
  auto &label_map = model_iter->second;
  auto label_iter = label_map.find(label_id_str);
  if (label_iter != label_map.end()) {
    auto label_guard = label_iter->second.lock();
    if (label_guard != nullptr) {
      GELOGI("model %p find same label id %s.", model, label_id_str.c_str());
      return label_guard;
    }
  }

  GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model);
  void *label_info;
  std::vector<void *> label_list;
  bool status = true;
  std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list),
                 [&all_label, &status](uint32_t idx) -> void * {
                   if (idx >= all_label.size()) {
                     GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size());
                     status = false;
                     return nullptr;
                   }
                   return all_label[idx];
                 });
  if (!status) {
    GELOGE(PARAM_INVALID, "Get label info failed.");
    return nullptr;
  }
  uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size();
  rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return nullptr;
  }

  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return nullptr;
  }

  auto label_guard = std::make_shared<LabelGuard>(label_info);
  label_map.emplace(label_id_str, label_guard);
  return label_guard;
 }
 }  // namespace model_runner
 }  // namespace ge
--- a/ge/ge_runtime/task/label_manager.h
+++ b/ge/ge_runtime/task/label_manager.h
@@ -0,0 +1,54 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
 #define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_

 #include <vector>
 #include <memory>
 #include <mutex>
 #include <map>
 #include <runtime/base.h>

 namespace ge {
 namespace model_runner {
 class LabelGuard {
 public:
  explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast<uintptr_t>(label_info)) {}
  ~LabelGuard();
  void *GetLabelInfo() { return reinterpret_cast<void *>(label_info_); }

 private:
  uintptr_t label_info_;
 };

 class LabelManager {
 public:
  static std::shared_ptr<LabelManager> GetInstance();
  std::shared_ptr<LabelGuard> GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids,
                                           const std::vector<void *> &all_label);

 private:
  std::mutex model_info_mapping_mutex_;
  std::map<rtModel_t, std::map<std::string, std::weak_ptr<LabelGuard>>> model_info_mapping_;

  static std::weak_ptr<LabelManager> instance_;
  static std::mutex instance_mutex_;
 };


 }  // namespace model_runner
 }  // namespace ge
 #endif  // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
--- a/ge/ge_runtime/task/label_switch_task.cc
+++ b/ge/ge_runtime/task/label_switch_task.cc
@@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
    : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info),
      task_info_(task_info),
      stream_(nullptr),
      all_label_resource_(),
      label_info_(nullptr) {
  if (task_info_ == nullptr) {
    GELOGW("task_info_ is null!");
    return;
  }

  all_label_resource_ = model_context.label_list();
  rt_model_handle_ = model_context.rt_model_handle();
  auto all_label_resource = model_context.label_list();
  auto stream_list = model_context.stream_list();
  uint32_t stream_id = task_info->stream_id();
  GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
@@ -40,52 +40,24 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
    return;
  }
  stream_ = stream_list[stream_id];
 }

 LabelSwitchTask::~LabelSwitchTask() {
  if (label_info_ != nullptr) {
    rtError_t rt_ret = rtFree(label_info_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret);
    }
    label_info_ = nullptr;
  label_manager_ = LabelManager::GetInstance();
  if (label_manager_ == nullptr) {
    GELOGW("Get label manager instance failed.");
    return;
  }
  label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource);
 }

 LabelSwitchTask::~LabelSwitchTask() {}

 bool LabelSwitchTask::Distribute() {
  GELOGI("LabelSwitchTask Distribute start.");
  if (!CheckParamValid()) {
    return false;
  }

  const std::vector<uint32_t> &label_index_list = task_info_->label_list();
  std::vector<void *> label_list(task_info_->label_size(), nullptr);

  for (size_t i = 0; i < task_info_->label_size(); ++i) {
    uint32_t label_index = label_index_list[i];
    if (label_index >= all_label_resource_.size()) {
      GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index,
             all_label_resource_.size());
      return false;
    }
    label_list[i] = all_label_resource_[label_index];
    GELOGI("Case %zu: label id %zu.", i, label_index);
  }

  uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size();
  rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
  }

  rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_);
  void *label_info = label_info_->GetLabelInfo();
  rtError_t rt_ret = rtLabelSwitchByIndex(task_info_->cond(), task_info_->label_size(), label_info, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return false;
@@ -117,8 +89,8 @@ bool LabelSwitchTask::CheckParamValid() {
    return false;
  }

  if (label_info_ != nullptr) {
    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
  if (label_info_ == nullptr) {
    GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null.");
    return false;
  }

@@ -126,6 +98,5 @@ bool LabelSwitchTask::CheckParamValid() {
 }

 REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo);

 }  // namespace model_runner
 }  // namespace ge
--- a/ge/ge_runtime/task/label_switch_task.h
+++ b/ge/ge_runtime/task/label_switch_task.h
@@ -19,6 +19,7 @@

 #include <memory>
 #include "ge_runtime/task/task.h"
 #include "ge_runtime/task/label_manager.h"

 namespace ge {
 namespace model_runner {
@@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> {

  std::shared_ptr<LabelSwitchTaskInfo> task_info_;
  void *stream_;
  std::vector<void *> all_label_resource_;
  void *label_info_;
  rtModel_t rt_model_handle_;
  std::shared_ptr<LabelGuard> label_info_;
  std::shared_ptr<LabelManager> label_manager_;
 };
 }  // namespace model_runner
 }  // namespace ge
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -67,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) {
  }
  return false;
 }
 bool IsOptional(const ge::GeTensorDesc &tensor_desc) {
  return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED;
 }
 }  // namespace

 namespace ge {
@@ -154,7 +157,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
 }

 static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
                        bool attr) {
                        bool attr, int32_t &data_index) {
  GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
  GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);

@@ -197,9 +200,10 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
                   "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str());
  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
                   "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str());
  if (attr) {
    GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED,
  if (attr && !is_const) {
    GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED,
                     "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
    ++data_index;
  }

  ge::NodePtr arg_node = graph->AddNode(data_op);
@@ -565,6 +569,44 @@ bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) {
  return true;
 }

 Status GeGenerator::SetModelNameForDump(const GeRootModelPtr &ge_root_model) {
  bool is_unknown_shape = false;
  Status ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "[Check][IsUnknownShape]Check root model is unknown shape failed, model id:%u",
           ge_root_model->GetModelId());
    REPORT_CALL_ERROR("E19999", "Check root model is unknown shape failed, model id:%zu",
                      ge_root_model->GetModelId());
    return FAILED;
  }
  GeModelPtr model_root = nullptr;
  if (is_unknown_shape) {
    model_root = MakeShared<GeModel>();
    GE_CHECK_NOTNULL(model_root);
    model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph()));
    ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root);
  }

  ModelHelper model_helper;
  string model_name;
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
                                                                 model_name);
  if (name_ret != SUCCESS) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
    GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s",
           ge_root_model->GetRootGraph()->GetName().c_str());
    REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid, root graph name: %s",
                      ge_root_model->GetRootGraph()->GetName().c_str());
    return PARAM_INVALID;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GE_CHECK_NOTNULL(ge_model);
  ge_model->SetName(model_name);
  return SUCCESS;
 }

 Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
                                  ModelBufferData &model, bool is_offline) {
  rtContext_t ctx = nullptr;
@@ -599,20 +641,10 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
  }

  GE_CHECK_NOTNULL(ge_root_model);
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  ModelHelper model_helper;
  string model_name = "";
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
                                                                 model_name);
  if (name_ret != SUCCESS) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
    return PARAM_INVALID;
  ret = SetModelNameForDump(ge_root_model);
  if (ret != SUCCESS) {
    return ret;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
  ge_model->SetName(model_name);
  ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
  if (ret != SUCCESS) {
    GELOGE(ret, "Save model failed");
@@ -663,6 +695,34 @@ namespace {
    }
    return SUCCESS;
  }

  bool CheckNoAicore(const ComputeGraphPtr &graph) {
    for (const auto &node : graph->GetDirectNode()) {
      if (node == nullptr) {
        continue;
      }
      auto op_desc = node->GetOpDesc();
      if (op_desc == nullptr) {
        continue;
      }
      if (op_desc->GetOpEngineName() == kAIcoreEngine) {
        return false;
      }
    }
    return true;
  }
 }

 void GeGenerator::RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
  for (auto &input : inputs) {
    GeTensorDesc input_desc = input.GetTensorDesc();
    bool is_const = false;
    (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
    bool is_optional = IsOptional(input_desc);
    if (!is_optional && !is_const) {
      outputs.emplace_back(input);
    }
  }
 }

 Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
@@ -729,7 +789,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  GELOGI("ATC parser success in single op build.");

  GeRootModelPtr ge_root_model = nullptr;
  GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model));
  vector<GeTensor> data_inputs;
  RemoveConst(inputs, data_inputs);
  GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model));
  map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs();
  GE_CHECK_NOTNULL(ge_root_model);
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
@@ -745,7 +807,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in

  bool all_shape = false;
  (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
  if (all_shape) {
  if (all_shape && CheckNoAicore(root_graph)) {
    GELOGD("Get aicpu all_shape kernel!");
    vector<GeTensor> inputs_dynamic;
    vector<GeTensor> outputs_dynamic;
@@ -812,18 +874,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor

  // 2. Create InputData node.
  int32_t arg_index = 0;
  int32_t data_index = 0;
  if (inputs.empty()) {
    for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
      GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR);
      if (!IsNeedConnectInputOpForSingleOp(*input_desc)) {
        continue;
      }
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false));
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index));
      arg_index++;
    }
  } else {
    for (const auto &in_desc : inputs) {
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true));
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index));
      arg_index++;
    }
  }
@@ -882,13 +945,12 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
                   "ge root model has no sub model")
  GeModelPtr model_root = nullptr;
  if (is_unknown_shape) {
    model_root = make_shared<GeModel>();
    model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph()));
    ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root);
    model_root->SetName(ge_root_model->GetRootGraph()->GetName());
    auto name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
    model_root = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  } else {
    model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second;
  }
  GE_CHECK_NOTNULL(model_root);
  // set atc version
  if (!SetAtcVersionInfo(*(model_root.get()))) {
    GELOGW("SetPackageVersionInfo of atc failed!");
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -382,58 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
  return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
 }

 static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
                               const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
  GE_CHECK_NOTNULL(out_anchor);
  NodePtr in_node = out_anchor->GetOwnerNode();
  GE_CHECK_NOTNULL(in_node);
  OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
  OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
                                     .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
                                     .Build();
  (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
  if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
    GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
    return FAILED;
  }
  return SUCCESS;
 }

 static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
  if (graph->GetGraphUnknownFlag()) {
    GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
    return SUCCESS;
  }
  for (auto &node : graph->GetDirectNode()) {
    // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
    auto op_desc = node->GetOpDesc();
    if (op_desc == nullptr) {
      continue;
    }
    auto op_type = op_desc->GetType();
    if (op_type == NETOUTPUT) {
      for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
        const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
        GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
        NodePtr in_node = peer_out_anchor->GetOwnerNode();
        GE_CHECK_NOTNULL(in_node);

        std::string in_node_op_type = in_node->GetType();
        if (in_node_op_type == CONSTANT) {
          GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
          std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
          if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
            GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
                   in_node->GetName().c_str(), node->GetName().c_str());
            return FAILED;
          }
        }
      }
    }
  }
  return SUCCESS;
 }

 Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
  bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
  com_graph->SetGraphUnknownFlag(false);
@@ -516,9 +464,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
        !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
      continue;
    }

    GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");

    if (sub_graph->GetGraphUnknownFlag()) {
      // unknown shape build flow
      GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -597,11 +597,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
      int64_t size = 0;
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
      GE_IF_BOOL_EXEC(size < 0,
        GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s",
          size, node_op_desc->GetName().c_str());
        REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s",
          size, node_op_desc->GetName().c_str());
        return;);
                      GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, "
                             "maybe it is unknown shape node, Node_name:%s",
                             size, node_op_desc->GetName().c_str());
                      REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, "
                                         "maybe it is unknown shape node, Node_name:%s",
                                         size, node_op_desc->GetName().c_str());
                      return;);
      batch_all_memory_size[batch_label].emplace_back(size);
      if (batch_total_size.find(batch_label) == batch_total_size.end()) {
        batch_total_size[batch_label] = size;
@@ -692,23 +694,23 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
    auto out_anchor = n->GetOutDataAnchor(out_index);
    GE_IF_BOOL_EXEC(out_anchor == nullptr,
                    GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.",
                      n->GetName().c_str(), out_index);
                           n->GetName().c_str(), out_index);
                    REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.",
                      n->GetName().c_str(), out_index);
                                       n->GetName().c_str(), out_index);
                    return false;);
    for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
      GE_IF_BOOL_EXEC(peer_in_anchor == nullptr,
                      GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.",
                        n->GetName().c_str(), out_index);
                             n->GetName().c_str(), out_index);
                      REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.",
                        n->GetName().c_str(), out_index);
                                         n->GetName().c_str(), out_index);
                      return false;);
      auto peer_node = peer_in_anchor->GetOwnerNode();
      GE_IF_BOOL_EXEC(peer_node == nullptr,
                      GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.",
                        n->GetName().c_str(), out_index);
                             n->GetName().c_str(), out_index);
                      REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.",
                        n->GetName().c_str(), out_index);
                                         n->GetName().c_str(), out_index);
                      return false;);

      // Get the continuous input type of the node, default is false
@@ -716,9 +718,9 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
      auto peer_in_node_desc = peer_node->GetOpDesc();
      GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr,
                      GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.",
                        n->GetName().c_str(), out_index);
                             n->GetName().c_str(), out_index);
                      REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.",
                        n->GetName().c_str(), out_index);
                                         n->GetName().c_str(), out_index);
                      return false;);

      // If GetBool fail, is_input_continuous is false.
@@ -819,7 +821,7 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &
        (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) ||
        (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) {
      GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.",
        n->GetName().c_str(), out_index);
             n->GetName().c_str(), out_index);
      REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.",
                         n->GetName().c_str(), out_index);
      return false;
@@ -1105,9 +1107,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
                                           OpMemoryType mem_type, const NodePtr &n, uint32_t out_index,
                                           const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
                                           const bool continuous, int64_t memory_type) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr,
    REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
    return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      n == nullptr,
      REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
      return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null.");
  auto node_op_desc = n->GetOpDesc();
  GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr);
  std::string batch_label;
@@ -1159,10 +1162,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  }

  auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr,
    REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u",
      n->GetName().c_str(), out_index);
    return nullptr, "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      block == nullptr,
      REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u",
                         n->GetName().c_str(), out_index);
      return nullptr,
      "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index);

  // Data and netoutput need zero copy block
  block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
@@ -1221,13 +1226,15 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu

 Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
                                               const bool is_op_reuse_mem) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr,
    REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null");
    return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      n == nullptr,
      REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null");
      return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null.");
  auto node_op_desc = n->GetOpDesc();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr,
    REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
    return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      node_op_desc == nullptr,
      REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
      return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null");

  // continuous output support ref only when all output ref input
  bool isAllOutputRef = true;
@@ -1242,7 +1249,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in

  if (!isAllOutputRef && isOutputHasRef) {
    REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s",
      n->GetName().c_str());
                       n->GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s",
           n->GetName().c_str());
    return INTERNAL_ERROR;
@@ -1255,7 +1262,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
    auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
    if (output_op_desc == nullptr) {
      REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u",
         n->GetName().c_str(), index);
                         n->GetName().c_str(), index);
      GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index);
      return INTERNAL_ERROR;
    }
@@ -1268,7 +1275,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
    int64_t size = 0;
    if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
      REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u",
         n->GetName().c_str(), index);
                        n->GetName().c_str(), index);
      GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index);
      return INTERNAL_ERROR;
    }
@@ -1310,7 +1317,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
    ++(block->ref_count_);
  } else {
    REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld",
         n->GetName().c_str(), total_size);
                      n->GetName().c_str(), total_size);
    GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size);
    return INTERNAL_ERROR;
  }
@@ -1319,26 +1326,33 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in

 MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges,
                                              const bool is_op_reuse_mem, const bool continuous) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr,
    REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null");
    return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      n == nullptr,
      REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null");
      return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null");
  auto node_op_desc = n->GetOpDesc();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr,
    REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
    return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      node_op_desc == nullptr,
      REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
      return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null");
  MemoryBlock *block = nullptr;
  NodeIndexIO node_index_io(n, index, kOut);
  int64_t size = 0;
  auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
  GE_IF_BOOL_EXEC(output_op_desc == nullptr,
    REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
    GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index);
    return nullptr);
  GE_IF_BOOL_EXEC(
      output_op_desc == nullptr,
      REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u",
                         n->GetName().c_str(), index);
      GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index);
      return nullptr);
  GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
  size_t no_align_size = 0;
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
    REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
    return nullptr, "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
      REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u",
                        n->GetName().c_str(), index);
      return nullptr,
      "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index);

  std::string symbol;
  bool reuse_input = false;
@@ -1346,9 +1360,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
    block = symbol_blocks_[symbol];
    GE_IF_BOOL_EXEC(block == nullptr,
      REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s",
        node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
                         node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
      GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s",
        node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
             node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
      return nullptr);
    // reduce old size
    size_t align_size = block->Size();
@@ -1392,24 +1406,28 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
    vector<bool> workspace_reuse_flag;
    block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index,
                        workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr,
      REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u",
        n->GetName().c_str(), block_size, index);
      return nullptr, "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u",
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
        block == nullptr,
        REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u",
                          n->GetName().c_str(), block_size, index);
        return nullptr,
        "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u",
        n->GetName().c_str(), block_size, index);
  }
  int out_count = 0;
  GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(),
    REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s",
      index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
    GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s",
      index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
    return nullptr);
  GE_IF_BOOL_EXEC(
      index >= n->GetAllOutDataAnchors().size(),
      REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s",
                         index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
      GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s",
             index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
      return nullptr);
  auto out_data_anchor = n->GetOutDataAnchor(index);
  GE_IF_BOOL_EXEC(out_data_anchor == nullptr,
    REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str());
    GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str());
    return nullptr);
  GE_IF_BOOL_EXEC(
      out_data_anchor == nullptr,
      REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str());
      GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str());
      return nullptr);
  for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    auto owner_node = in_anchor->GetOwnerNode();
    auto op_desc = owner_node->GetOpDesc();
@@ -1616,12 +1634,13 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
         op_desc->GetOutputsSize(), memorys_type.size());
  if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) {
    REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s",
      ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
      op_desc->GetOutputsSize(), op_desc->GetName().c_str());
    GELOGE(INTERNAL_ERROR,
      "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s",
      ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
      op_desc->GetOutputsSize(), op_desc->GetName().c_str());
                       ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
                       op_desc->GetOutputsSize(), op_desc->GetName().c_str());
    GELOGE(
        INTERNAL_ERROR,
        "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s",
        ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
        op_desc->GetOutputsSize(), op_desc->GetName().c_str());
    return INTERNAL_ERROR;
  }

@@ -1748,9 +1767,11 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {

    if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) {
      REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s",
        TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str());
                         TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(),
                         temp.size(), n->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s",
        TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str());
             TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(),
             temp.size(), n->GetName().c_str());
      return;
    }
    for (size_t i = 0; i < temp.size(); i++) {
@@ -2160,10 +2181,11 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index,
      ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type);
  if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) {
    REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, "
      "index %zu invalid, bigger than attr %s size:%zu, node_name:%s",
      index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str());
                       "index %zu invalid, bigger than attr %s size:%zu, node_name:%s",
                       index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(),
                       workspace_memory_type.size(), node->GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s",
      index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str());
           index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str());
    return false;
  }
  memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM;
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -496,7 +496,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
    REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
                       "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str());
    GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
                    memory_type, node->GetName().c_str());
           memory_type, node->GetName().c_str());
    return FAILED;
  }
  // The head and tail of hcom continuous input should be added 512
@@ -929,8 +929,8 @@ Status GraphMemoryAssigner::AssignReferenceMemory() {

    if (out_op_desc->GetOutputsSize() > output_list.size()) {
      REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s "
                       "when AssignReferenceMemory",
                       out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
                         "when AssignReferenceMemory",
                         out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
      GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
             out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
      return ge::FAILED;
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() {
  return SUCCESS;
 }

 Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) {
  ge::NodePtr atomic_clean_node = nullptr;
  atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node);
  if (atomic_clean_node == nullptr) {
    return SUCCESS;
  }

  ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc();
  GE_CHECK_NOTNULL(atomic_op_desc);
  TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  if (tbe_kernel == nullptr) {
    std::string kernel_name;
    GeAttrValue::BYTES kernel_buffer;
    (void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name);
    (void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer);
    if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) {
      GE_CHECK_NOTNULL(kernel_buffer.GetData());
      std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize());
      tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data));
      GE_CHECK_NOTNULL(tbe_kernel);
    }
  }
  if (tbe_kernel == nullptr) {
    GELOGD("Atomic_clean_node doesn't have tbe_kernel.");
    return SUCCESS;
  }
  tbe_kernel_store_.AddTBEKernel(tbe_kernel);
  GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str());
  (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName());

  std::string kernel_name;
  (void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name);
  (void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name);

  std::string meta_data;
  (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data);
  (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data);

  std::string json_string;
  (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string);
  (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string);
  return SUCCESS;
 }

 Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
  // Add weight
  ge_model.SetWeight(weight_buffer_);
@@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
    }
    tbe_name_set.insert(tbe_kernel->GetName());
    tbe_kernel_store_.AddTBEKernel(tbe_kernel);

    GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!");
  }

  SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -89,6 +89,8 @@ class ModelBuilder {
  void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
                                std::set<std::string> &aicpu_tf_op_types);

  Status SaveAtomicTBEKernel(const OpDescPtr &op_desc);

  uint64_t session_id_;

  map<int64_t, size_t> mem_type_to_mem_offset_;
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -49,6 +49,7 @@ const char *const kIsLastNode = "is_last_node";
 const char *const kIsInputVar = "INPUT_IS_VAR";
 const char *const kIsOutputVar = "OUTPUT_IS_VAR";
 const char *const kProfilingMode = "PROFILING_MODE";
 const char *const kIteratorV2 = "IteratorV2";
 const uint32_t kProfilingArStep = 2;
 const uint64_t kProfilingFpStartLogid = 1;
 const uint64_t kProfilingBpEndLogid = 2;
@@ -57,6 +58,7 @@ const uint64_t kProfilingArEndLogid = 4;
 const uint64_t kProfilingIterEndLogid = 65535;
 const int64_t kHashFactor = 100000;
 const int64_t kInvalidGroupId = -1;
 const std::set<std::string> kFpNodeTypes = {ge::DATA, ge::GETNEXT, kIteratorV2};
 }  // namespace
 namespace ge {
 TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) {
@@ -621,8 +623,10 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP
    if (op_kernel_lib_name.empty()) {
      continue;
    }

    if (op_desc->GetType() == GETNEXT || op_desc->GetType() == DATA) {
    auto type = op_desc->GetType();
    std::string original_type;
    (void)AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, original_type);
    if (kFpNodeTypes.find(type) != kFpNodeTypes.end() || kFpNodeTypes.find(original_type) != kFpNodeTypes.end()) {
      auto out_anchor = node->GetOutDataAnchor(0);
      for (auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
        GE_CHECK_NOTNULL(peer_in_anchor);
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -20,9 +20,12 @@
 #include <string>

 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "omm/csa_interact.h"

 namespace ge {
 using Uint32Pair = pair<uint32_t, uint32_t>;
 const uint32_t kInvalidModelId = UINT32_MAX;
 GraphExecutor::GraphExecutor()
    : init_flag_(false),
      train_graph_flag_(false),
@@ -358,7 +361,8 @@ Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_ro
 }

 Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
                                        const std::vector<InputTensorInfo> &input_tensor) {
                                        const std::vector<InputTensorInfo> &input_tensor,
                                        const RunAsyncCallback& callback) {
  GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id);
  if (graph_id != last_graph_id_) {
    auto ret = FreeExecuteMemory();
@@ -368,7 +372,7 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &
  }
  last_graph_id_ = graph_id;
  GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
  Status ret = AsyncExecuteModel(ge_root_model->GetModelId(), input_tensor);
  Status ret = AsyncExecuteModel(ge_root_model, input_tensor, callback);
  if (ret != SUCCESS) {
    GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] AsyncExecuteModel Error!");
    return GE_GRAPH_SYNC_MODEL_FAILED;
@@ -378,11 +382,81 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &
  return SUCCESS;
 }

 Status GraphExecutor::AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &inputs) {
 bool CompareByLoad(const Uint32Pair &lhs, const Uint32Pair &rhs) {
  return lhs.second < rhs.second;
 }

 uint32_t GraphExecutor::GetExecuteModelId(const GeRootModelPtr &ge_root_model) {
  std::vector<uint32_t> model_ids = ge_root_model->GetAllModelId();
  if (model_ids.empty()) {
    return kInvalidModelId;
  }
  if (model_ids.size() == 1) {
    return ge_root_model->GetModelId();
  }
  std::vector<Uint32Pair> model_id_to_loads;
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  for (auto model_id : model_ids) {
    auto davinci_model = model_manager->GetModel(model_id);
    auto hybrid_model = model_manager->GetHybridModel(model_id);
    if (hybrid_model == nullptr) {
      GE_CHECK_NOTNULL(davinci_model);
    }
    uint32_t input_load = hybrid_model != nullptr ? hybrid_model->GetDataInputerSize() :
                                                    davinci_model->GetDataInputerSize();
    uint32_t running_load = hybrid_model != nullptr ? static_cast<uint32_t>(hybrid_model->GetRunningFlag()) :
                                                      static_cast<uint32_t>(davinci_model->GetRunningFlag());
    uint32_t load = input_load + running_load;
    if (load == 0) {
      return model_id;
    }
    model_id_to_loads.emplace_back(model_id, load);
  }
  sort(model_id_to_loads.begin(), model_id_to_loads.end(), CompareByLoad);
  if (model_id_to_loads.empty()) {
    return kInvalidModelId;
  }
  return model_id_to_loads.begin()->first;
 }

 Status GraphExecutor::SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model,
                                  const RunAsyncCallback &callback) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  if (model_manager->IsNeedHybridLoad(*ge_root_model)) {
    auto model = model_manager->GetHybridModel(model_id);
    GE_CHECK_NOTNULL(model);
    if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
      GELOGE(FAILED, "SetRunAsyncListenerCallback failed.");
      return FAILED;
    }
  } else {
    auto model = model_manager->GetModel(model_id);
    GE_CHECK_NOTNULL(model);
    if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
      GELOGE(FAILED, "SetRunAsyncListenerCallback failed.");
      return FAILED;
    }
  }
  return SUCCESS;
 }

 Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &inputs,
                                        const RunAsyncCallback &callback) {
  uint32_t model_id = GetExecuteModelId(ge_root_model);
  if (model_id == kInvalidModelId) {
    GELOGE(INTERNAL_ERROR, "No valid model id.");
    return INTERNAL_ERROR;
  }
  try {
    auto model_manager = ge::ModelManager::GetInstance();
    GE_CHECK_NOTNULL(model_manager);
    GELOGI("RunAsync begin.model_id %u", model_id);
    if (SetCallback(model_id, ge_root_model, callback) != SUCCESS) {
      GELOGE(FAILED, "RunAsync: SetCallBack for model fail");
      return FAILED;
    }

    Status ret = model_manager->DataInputTensor(model_id, inputs);
    if (ret != SUCCESS) {
--- a/ge/graph/execute/graph_execute.h
+++ b/ge/graph/execute/graph_execute.h
@@ -50,7 +50,7 @@ class GraphExecutor {
                      std::vector<GeTensor> &output_tensor);

  ge::Status ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
                               const std::vector<InputTensorInfo> &input_tensor);
                               const std::vector<InputTensorInfo> &input_tensor, const RunAsyncCallback &callback);

  Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr<GraphModelListener> listener);

@@ -116,6 +116,8 @@ class GraphExecutor {

  static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

  uint32_t GetExecuteModelId(const GeRootModelPtr &ge_root_model);

 private:
  Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,
                          OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc);
@@ -123,7 +125,8 @@ class GraphExecutor {
  Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor,
                          std::vector<GeTensor> &output_tensor);

  Status AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &input_tensor);
  Status AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &input_tensor,
                           const RunAsyncCallback &callback);

  void InitModelIdInfo(std::vector<uint32_t> &out_model_id_info, std::vector<SubGraphInfoPtr> &sub_graph_vec,
                       uint32_t output_size);
@@ -132,6 +135,9 @@ class GraphExecutor {

  Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr);

  static Status SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model,
                            const RunAsyncCallback &callback);

  bool init_flag_;

  bool train_graph_flag_;
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -60,7 +60,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "[LoadGraph] GE load graph model_ptr is nullptr.");
    return GE_GRAPH_PARAM_NULLPTR;
  }
  model_id = ge_root_model_ptr->GetModelId();

  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
--- a/ge/graph/load/model_manager/data_inputer.h
+++ b/ge/graph/load/model_manager/data_inputer.h
@@ -134,6 +134,8 @@ class DataInputer {
  ///
  void Stop() { queue_.Stop(); }

  uint32_t Size() { return queue_.Size(); }

 private:
  ///
  /// @ingroup domi_ome
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -31,6 +31,7 @@
 #include "common/scope_guard.h"
 #include "common/thread_pool.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/compute_graph.h"
 #include "graph/debug/ge_attr_define.h"
@@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() {
      GE_CHK_STATUS(task->Release(), "Release task failed.");
    }
  }

  for (auto &item : label_goto_args_) {
    GE_FREE_RT_LOG(item.second.first);
  }
  label_goto_args_.clear();
 }

 Status DavinciModel::Assign(const GeModelPtr &ge_model) {
@@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info
  }
 }

 Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) {
  std::lock_guard<std::mutex> lock(label_args_mutex_);
  auto it = label_goto_args_.find(label_index);
  if (it != label_goto_args_.end()) {
    arg_addr = it->second.first;
    arg_size = it->second.second;
    return SUCCESS;
  }

  if (label_index >= label_list_.size()) {
    GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size());
    return INTERNAL_ERROR;
  }
  GE_CHECK_NOTNULL(label_list_[label_index]);
  vector<rtLabel_t> label_used = { label_list_[label_index] };

  arg_size = label_used.size() * sizeof(rtLabelDevInfo);
  rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  label_goto_args_[label_index] = { arg_addr, arg_size };
  rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  return SUCCESS;
 }

 /// @ingroup ge
 /// @brief LabelSet Op Initialize.
 /// @param [in] op_desc: LabelSet Op descriptor.
@@ -2547,6 +2586,8 @@ void *DavinciModel::Run(DavinciModel *model) {

  ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute);
  while (model->RunFlag()) {
    // Model hasn't truly started runing before received data
    model->SetRunningFlag(false);
    bool rslt_flg = true;
    if (model->GetDataInputer() == nullptr) {
      GELOGW("Data inputer is nullptr.");
@@ -2556,6 +2597,8 @@ void *DavinciModel::Run(DavinciModel *model) {

    std::shared_ptr<InputDataWrapper> data_wrapper;
    Status ret = model->GetDataInputer()->Pop(data_wrapper);
    // Model run indeedly start after received data.
    model->SetRunningFlag(true);
    if (data_wrapper == nullptr || ret != SUCCESS) {
      GELOGI("data_wrapper is null!");
      continue;
@@ -2642,7 +2685,9 @@ void *DavinciModel::Run(DavinciModel *model) {

    model->iterator_count_++;
    model->is_first_execute_ = false;
    GELOGI("run iterator count is %lu", model->iterator_count_);
    // model run finished
    model->SetRunningFlag(false);
    GELOGI("run iterator count is %lu, model_id:%u", model->iterator_count_, model->model_id_);
  }

  CsaInteract::GetInstance().WriteInternalErrorCode();
@@ -2700,7 +2745,7 @@ Status DavinciModel::ModelRunStart() {

  error_context_ = ErrorManager::GetInstance().GetErrorContext();
  CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this);
  GELOGI("model tread create success, model id:%u.", model_id_);
  GELOGI("model thread create success, model id:%u.", model_id_);
  return SUCCESS;
 }

@@ -2836,23 +2881,16 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
  GELOGI("DavinciModel::UpdateKnownNodeArgs in");
  GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs),
                    "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy.");
  if (!base_addr_not_changed_) {
    total_io_addrs_.clear();
    orig_total_io_addrs_.clear();
    for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
      auto &task = task_list_[task_index];
      if (task != nullptr) {
        Status ret = task->UpdateArgs();
        if (ret != SUCCESS) {
          GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index);
          return FAILED;
        }
  total_io_addrs_.clear();
  for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
    auto &task = task_list_[task_index];
    if (task != nullptr) {
      Status ret = task->UpdateArgs();
      if (ret != SUCCESS) {
        GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index);
        return FAILED;
      }
    }
    // cache latest iterator io addr
    orig_total_io_addrs_ = total_io_addrs_;
  } else {
    total_io_addrs_ = orig_total_io_addrs_;
  }
  GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

@@ -2892,6 +2930,14 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
  return SUCCESS;
 }

 Status DavinciModel::CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const {
  int64_t value = RT_CAPABILITY_SUPPORT;
  auto rt_ret = rtGetRtCapability(featureType, featureInfo, &value);
  GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "call rtGetRtCapability failed!");
  is_support = (value == RT_CAPABILITY_SUPPORT) ? true : false;
  return SUCCESS;
 }

 Status DavinciModel::MallocKnownArgs() {
  GELOGI("DavinciModel::MallocKnownArgs in");
  const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
@@ -2910,20 +2956,22 @@ Status DavinciModel::MallocKnownArgs() {
      return ret;
    }
  }
  rtError_t rt_ret;
  // malloc args memory
  if (total_args_size_ == 0) {
    GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero.");
    return SUCCESS;
  }
  bool is_support = false;
  GE_CHK_STATUS_RET_NOLOG(CheckCapability(FEATURE_TYPE_MEMORY, MEMORY_INFO_TS_4G_LIMITED, is_support));
  auto mem_type = is_support ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;

  rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  if (total_args_size_ != 0) {
    rt_ret = rtMalloc(&args_, total_args_size_, mem_type);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
  }
  // malloc dynamic and static hybrid memory
  if (total_hybrid_args_size_ != 0) {
    rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
    rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, mem_type);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -2932,7 +2980,7 @@ Status DavinciModel::MallocKnownArgs() {
  // malloc fixed addr memory, eg: rts op
  if (total_fixed_addr_size_ != 0) {
    GELOGI("Begin to allocate fixed addr.");
    rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, RT_MEMORY_HBM);
    rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, mem_type);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -3025,9 +3073,8 @@ Status DavinciModel::DistributeTask() {
                             task_def.kernel_ex().op_index());
    OpDescPtr op = GetOpByIndex(op_index);
    GE_CHECK_NOTNULL(op);

    if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
      bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo();
      bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo();
      if (call_dump || is_op_debug_reg_) {
        SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
      }
@@ -3047,11 +3094,16 @@ Status DavinciModel::DistributeTask() {
  return SUCCESS;
 }

 void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
 bool DavinciModel::ModelNeedDump() {
  auto all_dump_model = GetDumpProperties().GetAllDumpModel();
  bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
  bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) {
  bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
             all_dump_model.find(dump_model_name_) != all_dump_model.end() ||
             all_dump_model.find(om_name_) != all_dump_model.end();
  return ret;
 }

 void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
  if (ModelNeedDump()) {
    GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id);
    data_dumper_.SaveEndGraphId(task_id, stream_id);
  }
@@ -3851,7 +3903,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id)
 }

 void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) {
  data_dumper_.SetModelName(name_);
  if(dump_model_name_.empty()) {
    dump_model_name_ = name_;
  }
  data_dumper_.SetModelName(dump_model_name_);
  data_dumper_.SetModelId(model_id_);
  data_dumper_.SetOmName(om_name_);
  data_dumper_.SetComputeGraph(graph);
@@ -4040,7 +4095,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
 Status DavinciModel::InitL1DataDumperArgs() {
  auto all_dump_model = GetDumpProperties().GetAllDumpModel();
  bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end();
  bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end();
  bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end();
  bool dump_l1fusion_op =
    (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name;
  if (dump_l1fusion_op) {
@@ -4061,4 +4116,10 @@ Status DavinciModel::InitL1DataDumperArgs() {
  return SUCCESS;
 }

 Status DavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
  auto listener = dynamic_cast<RunAsyncListener *>(listener_.get());
  GE_CHECK_NOTNULL(listener);
  listener->SetCallback(callback);
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -221,6 +221,11 @@ class DavinciModel {
  ///
  DataInputer *const GetDataInputer() const { return data_inputer_; }

  uint32_t GetDataInputerSize() {
    GE_CHECK_NOTNULL(data_inputer_);
    return data_inputer_->Size();
  }

  // get Stream number
  uint32_t StreamNum() const { return runtime_param_.stream_num; }

@@ -248,7 +253,10 @@ class DavinciModel {
  string Name() const { return name_; }

  // om_name
  string OmName() const { return om_name_; }
  const string &OmName() const { return om_name_; }

  // dump_model_name
  const string &DumpModelName() const { return dump_model_name_; }

  // version
  uint32_t Version() const { return version_; }
@@ -273,6 +281,8 @@ class DavinciModel {

  const vector<rtLabel_t> &GetLabelList() const { return label_list_; }

  Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size);

  Status DestroyThread();

  // get Op
@@ -481,6 +491,12 @@ class DavinciModel {
    data_dumper_.DumpShrink();
  }

  bool OpNeedDump(const string &op_name) {
    return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name);
  }

  bool ModelNeedDump();

  void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
  DavinciModel &operator=(const DavinciModel &model) = delete;

@@ -528,11 +544,11 @@ class DavinciModel {
  }
  void SetKnownNode(bool known_node) { known_node_ = known_node; }
  bool IsKnownNode() { return known_node_; }
  Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const;
  Status MallocKnownArgs();
  Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
  void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
  Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
@@ -540,6 +556,7 @@ class DavinciModel {

  // om file name
  void SetOmName(const string &om_name) { om_name_ = om_name; }
  void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; }

  void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
  const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }
@@ -548,6 +565,10 @@ class DavinciModel {
    return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
  }

  bool GetRunningFlag() const { return running_flg_; }
  void SetRunningFlag(bool flag) { running_flg_ = flag; }
  Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);

 private:
  // memory address of weights
  uint8_t *weights_mem_base_;
@@ -886,6 +907,7 @@ class DavinciModel {

  // used for inference data dump
  string om_name_;
  string dump_model_name_;

  uint32_t version_;
  GeModelPtr ge_model_;  // release after DavinciModel::Init
@@ -911,6 +933,8 @@ class DavinciModel {
  shared_ptr<ModelListener> listener_;

  bool run_flg_;
  // check whether model is running with data
  bool running_flg_ = false;

  mutex mux_run_flg_;

@@ -930,6 +954,9 @@ class DavinciModel {
  vector<rtLabel_t> label_list_;
  set<uint32_t> label_id_indication_;

  mutex label_args_mutex_;
  map<uint32_t, pair<void *, uint32_t>> label_goto_args_;

  mutex outside_addrs_mutex_;
  vector<ZeroCopyTask> zero_copy_tasks_;  // Task used Data or NetOutput addr.
  set<const void *> copy_only_addrs_;     // Address need copy to original place.
@@ -1002,8 +1029,6 @@ class DavinciModel {
  map<const void *, void *> known_input_data_info_;
  map<const void *, void *> known_output_data_info_;
  vector<void *> total_io_addrs_;
  vector<void *> orig_total_io_addrs_;
  bool base_addr_not_changed_ = false;

  vector<vector<int64_t>> batch_info_;
  vector<vector<int64_t>> combined_batch_info_;
--- a/ge/graph/load/model_manager/model_manager.cc
+++ b/ge/graph/load/model_manager/model_manager.cc
@@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uin
  return SUCCESS;
 }

 ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &model_name,
 ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &om_name,
                                                 const shared_ptr<ge::GeRootModel> &ge_root_model,
                                                 const shared_ptr<ModelListener> &listener) {
  auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model);
@@ -279,13 +279,24 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string
  hybrid_model->SetListener(listener);
  hybrid_model->SetModelId(model_id);
  hybrid_model->SetDeviceId(GetContext().DeviceId());
  hybrid_model->SetModelName(model_name);
  hybrid_model->SetOmName(om_name);
  GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id);
  auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release());
  InsertModel(model_id, shared_model);
  return SUCCESS;
 }

 bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) {
  auto root_graph = ge_root_model.GetRootGraph();
  if (root_graph == nullptr) {
    GELOGE(FAILED, "no model on root model");
    return false;
  }
  bool is_shape_unknown = root_graph->GetGraphUnknownFlag();
  bool is_dsp_partitioned_graph = false;
  (void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph);
  return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag();
 }
 ///
 /// @ingroup domi_ome
 /// @brief load model online
@@ -296,13 +307,12 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
  GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null");
  if (model_id == INVALID_MODEL_ID) {
    GenModelId(&model_id);
    GELOGD("Generate new model_id:%u", model_id);
  }
  auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
  string model_name = "";
  bool is_shape_unknown = ge_root_model->GetRootGraph()->GetGraphUnknownFlag();
  // if multi subgraph is known, do hybrid load process
  if (is_shape_unknown || GetContext().GetHostExecFlag() || (name_to_model.size() > 1)) {
    return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener);
  string om_name;
  if (IsNeedHybridLoad(*ge_root_model)) {
    return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener);
  }

  mmTimespec timespec = mmGetTickCount();
@@ -330,7 +340,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
    GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed.");
                    break;);
    GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign");

    /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
    /// These session_ids come from the same model, so the values of session_id are the same.
    /// Update session_id for infer in load model to avoid the same session_id.
    if (!ge_root_model->GetTrainFlag()) {
      uint64_t new_session_id;
      ret = GenSessionId(new_session_id);
      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
      ret = davinci_model->UpdateSessionId(new_session_id);
      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");
      ge_model->InsertSessionMap(model_id, new_session_id);
      GELOGD("Update new session id: %lu.", new_session_id);
    }
    GE_TIMESTAMP_START(Init);
    GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;);
    GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit");
@@ -343,16 +364,16 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
  return ret;
 }

 void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
  GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
 void ModelManager::InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model) {
  GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", model_id);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  model_map_[id] = davinci_model;
  model_map_[model_id] = davinci_model;
 }

 void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
  GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
 void ModelManager::InsertModel(uint32_t model_id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
  GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", model_id);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  hybrid_model_map_[id] = hybrid_model;
  hybrid_model_map_[model_id] = hybrid_model;
 }

 Status ModelManager::DeleteModel(uint32_t id) {
--- a/ge/graph/load/model_manager/model_manager.h
+++ b/ge/graph/load/model_manager/model_manager.h
@@ -294,6 +294,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
                                       std::vector<InputOutputDims> &output_dims);

  bool IsDynamicShape(uint32_t model_id);
  bool IsNeedHybridLoad(ge::GeRootModel &ge_root_model);
  ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

  ge::Status EnableExceptionDump(const std::map<string, string> &options);
@@ -329,8 +330,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  /// @ingroup domi_ome
  /// @brief insert new model into model manager set
  ///
  void InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model);
  void InsertModel(uint32_t id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model);
  void InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model);
  void InsertModel(uint32_t model_id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model);

  ///
  /// @ingroup domi_ome
--- a/ge/graph/load/model_manager/model_utils.cc
+++ b/ge/graph/load/model_manager/model_utils.cc
@@ -384,7 +384,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc
  switch (mem_type) {
    case RT_MEMORY_RDMA_HBM:
      if (offset < 0) {
        GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset));
        GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p",
               reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)));
        return PARAM_INVALID;
      }
      var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset));
--- a/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/end_graph_task_info.cc
@@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
 Status EndGraphTaskInfo::Distribute() {
  GELOGI("EndGraphTaskInfo Distribute Start.");
  GE_CHECK_NOTNULL(davinci_model_);
  auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
      all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
      all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
  if (davinci_model_->ModelNeedDump()) {
    GELOGI("Start to call rtEndGraphEx");
    rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
    if (rt_ret != RT_ERROR_NONE) {
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
 }

 void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
  if (davinci_model_->OpNeedDump(op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    dump_args_ = addr;
  }
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -124,7 +124,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
      return FAILED;
    }

    ret = InitTVMTask(args_offset_tmp[0], kernel_def);
    io_addr_offset_ = args_offset_tmp[0];
    ret = InitTVMTask(io_addr_offset_, kernel_def);
  } else if (kernel_type_ == ccKernelType::CUSTOMIZED) {
    ret = InitAICPUCustomTask(context.op_index(), kernel_def);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
@@ -380,7 +381,8 @@ Status KernelTaskInfo::Distribute() {
  GELOGD("KernelTaskInfo Distribute Start.");
  if (davinci_model_->IsKnownNode()) {
    if (kernel_type_ == ccKernelType::TE) {
      args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
      args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_)
                            : davinci_model_->GetCurrentArgsAddr(args_offset_);
    } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
      args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
    }
@@ -407,10 +409,7 @@ Status KernelTaskInfo::Distribute() {
        call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_);
    // l1 fusion enable and env flag open (kCloseSkt for skt debug)
    bool open_dump = false;
    auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
    if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
        all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
        all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
    if (davinci_model_->ModelNeedDump()) {
      open_dump = true;
    }
    if (call_skt && (env_flag != kCloseSkt) && !open_dump) {
@@ -449,29 +448,41 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) {
  }
 }

 Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) {
  GE_CHECK_NOTNULL(davinci_model_);
  // copy new io addrs
  vector<void *> io_addrs = io_addrs_;
  davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
  auto addr_size = kAddrLen * io_addrs.size();

  // copy io addr
  errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs.data(), addr_size);
  if (sec_ret != EOK) {
    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
    return FAILED;
  }

  // copy args to device
  rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_);
  return SUCCESS;
 }

 Status KernelTaskInfo::UpdateArgs() {
  GELOGI("KernelTaskInfo::UpdateArgs in.");
  GE_CHECK_NOTNULL(davinci_model_);
  if (kernel_type_ == ccKernelType::TE) {
    if (l2_buffer_on_) {
      return CopyNoncontinuousArgs(io_addr_offset_);
    }
    davinci_model_->SetTotalIOAddrs(io_addrs_);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    vector<void *> io_addrs = io_addrs_;
    davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
    uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
    auto addrs_size = sizeof(uint64_t) * io_addrs.size();
    errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
    if (sec_ret != EOK) {
      GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
      return FAILED;
    }
    // copy args to device
    rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead));
  }

  GELOGI("KernelTaskInfo::UpdateArgs success.");
  return SUCCESS;
 }

@@ -516,8 +527,8 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
    return SUCCESS;
  }

  char *sm_contrl = const_cast<char *>(sm_desc.data());
  rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_contrl);
  char *sm_control = const_cast<char *>(sm_desc.data());
  rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control);
  uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr();

  // There is no weight for te op now. Update L2_mirror_addr by data memory base.
@@ -545,19 +556,31 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
  return SUCCESS;
 }

 void KernelTaskInfo::SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model) {
  args_offset_ = davinci_model->GetTotalArgsSize();
  davinci_model->SetTotalArgsSize(args_size);
 }

 void KernelTaskInfo::SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model) {
  hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
  davinci_model->SetHybridArgsSize(args_size);
 }

 Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  GE_CHECK_NOTNULL(davinci_model);
  const domi::KernelDef &kernel_def = task_def.kernel();
  const domi::KernelContext &context = kernel_def.context();
  kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
  uint32_t args_size = kernel_def.args_size();
  if (kernel_type_ == ccKernelType::TE) {
    uint32_t args_size = kernel_def.args_size();
    args_offset_ = davinci_model->GetTotalArgsSize();
    davinci_model->SetTotalArgsSize(args_size);
    GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
    if (kernel_def.sm_desc().empty()) {
      SetContinuousArgs(args_size, davinci_model);
      return SUCCESS;
    }
    l2_buffer_on_ = true;
    SetNoncontinuousArgs(args_size, davinci_model);
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
    davinci_model->SetHybridArgsSize(kernel_def.args_size());
    GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
    SetNoncontinuousArgs(args_size, davinci_model);
  }
  return SUCCESS;
 }
@@ -568,8 +591,23 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  // get tvm op desc
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
  GE_CHECK_NOTNULL(op_desc);

  args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
  errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
  if (sec_ret != EOK) {
    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
    return FAILED;
  }

  Status ge_ret = UpdateL2Data(kernel_def);
  // update origin l2 data
  if (ge_ret != SUCCESS) {
    return ge_ret;
  }

  if (davinci_model_->IsKnownNode()) {
    args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
    args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_)
                          : davinci_model_->GetCurrentArgsAddr(args_offset_);
    InitDumpTask(offset);
    return SUCCESS;
  }
@@ -609,12 +647,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  vector<uint8_t> args_info(args_size_);
  errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_);
  if (sec_ret != EOK) {
    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
    return FAILED;
  }

  if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) {
    GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory.");
@@ -628,7 +660,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(),
  sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(),
                     kAddrLen * tensor_device_addrs.size());
  if (sec_ret != EOK) {
    GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
@@ -640,19 +672,13 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset,
                             "Op debug is open in TVM task info");

  Status ge_ret = UpdateL2Data(kernel_def);
  // update origin l2 data
  if (ge_ret != SUCCESS) {
    return ge_ret;
  }

  vector<void *> virtual_io_addrs;  // use virtual address for zero copy key.
  virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
  virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
  if (op_desc->GetType() == ATOMICADDRCLEAN) {
    virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
  }
  davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset);
  davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, offset);

  GELOGD("Do InitTVMTask end");
  return SUCCESS;
@@ -951,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
 }

 void KernelTaskInfo::InitDumpTask(uint32_t offset) {
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc_->GetName())) {
  if (davinci_model_->OpNeedDump(op_desc_->GetName())) {
    if (IsL1FusionOp(op_desc_)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h
@@ -129,6 +129,9 @@ class KernelTaskInfo : public TaskInfo {
  bool IsL1FusionOp(const OpDescPtr &op_desc);
  void SetIoAddrs(const OpDescPtr &op_desc);
  void InitDumpTask(uint32_t offset);
  void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model);
  void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model);
  Status CopyNoncontinuousArgs(uint16_t offset);

  // For super kernel
  Status SaveSKTDumpInfo();
@@ -163,6 +166,8 @@ class KernelTaskInfo : public TaskInfo {
  uint32_t hybrid_args_offset_ = 0;
  int64_t fixed_addr_offset_ = 0;
  std::unique_ptr<uint8_t[]> args_addr = nullptr;
  uint16_t io_addr_offset_ = 0;
  bool l2_buffer_on_ = false;
  bool call_save_dump_ = false;

  // aicpu ext_info device mem
--- a/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
@@ -22,7 +22,7 @@ namespace ge {
 constexpr uint8_t kGotoBranchMax = 1;

 LabelGotoExTaskInfo::~LabelGotoExTaskInfo() {
  GE_FREE_RT_LOG(args_);
  args_ = nullptr;
  GE_FREE_RT_LOG(index_value_);
 }

@@ -49,30 +49,12 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
    return INTERNAL_ERROR;
  }

  const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
  if (label_index >= label_list.size()) {
    GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size());
    return INTERNAL_ERROR;
  }
  GE_CHECK_NOTNULL(label_list[label_index]);
  vector<rtLabel_t> label_used = { label_list[label_index] };

  rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;
  GELOGI("memory_type: %u", memory_type);
  args_size_ = kGotoBranchMax * sizeof(rtLabelDevInfo);
  rtError_t rt_ret = rtMalloc(&args_, args_size_, memory_type);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_));

  rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
  rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -85,7 +67,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_list[label_index]);
  GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index);
  return SUCCESS;
 }

--- a/ge/graph/manager/graph_caching_allocator.cc
+++ b/ge/graph/manager/graph_caching_allocator.cc
@@ -356,6 +356,14 @@ void CachingAllocator::FreeBlocks() {
  (void) FreeCachedBlocks();
 }

 void CachingAllocator::TryFreeBlocks() {
  GELOGI("Try free blocks.");
  std::lock_guard<std::recursive_mutex> lock(mutex_);
  if (allocated_blocks_.empty()) {
    (void) FreeCachedBlocks();
  }
 }

 void CachingAllocator::FreeBlockBins() {
  GELOGI("Free block bins.");
  std::lock_guard<std::recursive_mutex> lock(mutex_);
--- a/ge/graph/manager/graph_caching_allocator.h
+++ b/ge/graph/manager/graph_caching_allocator.h
@@ -94,6 +94,13 @@ class CachingAllocator {
  ///
  Status Free(uint8_t *memory_addr, uint32_t device_id = 0);

  ///
  /// @ingroup ge_graph
  /// @brief try to free memory when no memory is referenced
  /// @return void
  ///
  void TryFreeBlocks();

 private:

  ///
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -117,6 +117,10 @@ const char *const kAIcoreEngine = "AIcoreEngine";
 const int32_t kDynamicDimsTypeIsGetNext = 0;
 const int32_t kDynamicDimsTypeIsData = 1;
 const char *const kGetNextName = "IteratorV2";
 const uint32_t kInitGraphCount = 1;
 const uint32_t kNotAdded = 0;
 const uint32_t kStartAdd = 1;
 const uint32_t kDoneAdded = 2;

 bool IsTailingOptimization() {
  string is_tailing_optimization_option;
@@ -195,6 +199,8 @@ Status GraphManager::Initialize(const std::map<string, string> &options) {

  graph_map_.clear();
  cache_helper_map_.clear();
  graph_id_to_add_graph_cond_.clear();
  graph_count_.clear();
  init_flag_ = true;

  thread_run_flag_ = true;
@@ -204,6 +210,20 @@ Status GraphManager::Initialize(const std::map<string, string> &options) {
  return SUCCESS;
 }

 Status GraphManager::UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id) {
  Status ret = SUCCESS;
  for (size_t i = 0; i < ge_root_model->GetAllModelId().size(); ++i) {
    uint32_t model_id = ge_root_model->GetAllModelId()[i];
    GELOGI("Unload model %u.", model_id);
    ret = GraphLoader::UnloadModel(model_id);
    if (ret != SUCCESS) {
      GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
      return ret;
    }
  }
  return ret;
 }

 Status GraphManager::Finalize() {
  if (!init_flag_) {
    GELOGW("GraphManager has not been initialized.");
@@ -234,7 +254,6 @@ Status GraphManager::Finalize() {
      unload_model_ret = GE_GRAPH_GRAPH_IS_RUNNING;
      continue;
    }

    // unload model
    auto ge_root_model = graph_node->GetGeRootModel();
    if (ge_root_model != nullptr && ge_root_model->GetModelId() != INVALID_MODEL_ID && graph_node->GetLoadFlag()) {
@@ -244,15 +263,14 @@ Status GraphManager::Finalize() {
        unload_model_ret = FAILED;
        continue;
      }
      ret = GraphLoader::UnloadModel(ge_root_model->GetModelId());
      ret = UnloadModel(ge_root_model, iter->first);
      if (ret != SUCCESS) {
        GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), iter->first);
        GELOGW("[GraphManager] unload model failed, graph_id=%u.", iter->first);
        unload_model_ret = ret;
      }
      rt_ret = rtDeviceReset(GetContext().DeviceId());
      if (rt_ret != RT_ERROR_NONE) {
        GELOGW("[GraphManager] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
               iter->first);
        GELOGW("[GraphManager] rtDeviceReset failed, graphId=%u.", iter->first);
        unload_model_ret = FAILED;
        continue;
      }
@@ -267,6 +285,7 @@ Status GraphManager::Finalize() {
  }
  graph_map_.clear();
  cache_helper_map_.clear();
  graph_count_.clear();

  // graph context
  if (graph_context_ != nullptr) {
@@ -317,30 +336,59 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) {
  return SUCCESS;
 }

 Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
                              const std::map<std::string, std::string> &options,
                              const OmgContext &omg_context) {
  if (HasGraphNode(graph_id)) {
    GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id);
    return GE_GRAPH_GRAPH_ALREADY_EXIST;
 void GraphManager::SetAddGraphCondition(GraphId graph_id, uint32_t cond) {
  std::lock_guard<std::mutex> lock(add_graph_cond_mutex_);
  graph_id_to_add_graph_cond_[graph_id] = cond;
  GELOGD("Graph [id:%u] has been added.", graph_id);
 }

 uint32_t GraphManager::GetAddGraphCondition(GraphId graph_id) {
  std::lock_guard<std::mutex> lock(add_graph_cond_mutex_);
  auto it = graph_id_to_add_graph_cond_.find(graph_id);
  if (it != graph_id_to_add_graph_cond_.end()) {
    return it->second;
  } else {
    GELOGD("Graph [id:%u] has not been added.", graph_id);
    return kNotAdded;
  }
 }

  auto compute_graph = GraphUtils::GetComputeGraph(graph);
  if (compute_graph != nullptr) {
    compute_graph->SetGraphID(graph_id);
    bool graph_has_been_added = false;
    if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added)
        && graph_has_been_added) {
      GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST,
             "[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id);
      return GE_GRAPH_GRAPH_ALREADY_EXIST;
    }
    (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true);
    compute_graph_ = compute_graph;
 void GraphManager::RemoveAddGraphCondition(GraphId graph_id) {
  std::lock_guard<std::mutex> lock(add_graph_cond_mutex_);
  auto it = graph_id_to_add_graph_cond_.find(graph_id);
  if (it != graph_id_to_add_graph_cond_.end()) {
    graph_id_to_add_graph_cond_.erase(it);
    GELOGD("Successfully removed add_graph_cond of graph [id:%u].", graph_id);
  } else {
    GELOGE(FAILED, "compute graph is null");
    return FAILED;
    GELOGD("Graph [id:%u] has not been added. no need to remove.", graph_id);
  }
 }

 Status GraphManager::CheckRepeatAdd(uint32_t graph_id, bool &is_added) {
  uint32_t count = 0;
  if (GetGraphCount(graph_id, count) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id);
    return INTERNAL_ERROR;
  }
  // previous thread owns same graph_id has been in the middle of the AddGraph procession
  if (count > 1 && GetAddGraphCondition(graph_id) == kStartAdd) {
    std::unique_lock<std::mutex> lock(add_graph_mutex_);
    GELOGD("Waitting for build end of previous thread.");
    while (GetAddGraphCondition(graph_id) != kDoneAdded) {
      add_graph_cv_.wait(lock);
    }
    GraphNodePtr graph_node;
    Status ret = GetGraphNode(graph_id, graph_node);
    if (ret != SUCCESS) {
      GELOGE(ret, "[AddGraph] GetGraphNode failed, graph_id = %u.", graph_id);
      return ret;
    }
    is_added = true;
  }
  return SUCCESS;
 }

 void GraphManager::SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id) {
  std::string session_graph_id;
  if (!AttrUtils::GetStr(*compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id) || session_graph_id.empty()) {
    session_graph_id = "-1_" + to_string(graph_id);
@@ -352,7 +400,24 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
    }
    GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]");
  }
 }

 Status GraphManager::NotifyWaittingGraph(uint32_t graph_id) {
  uint32_t count = 0;
  if (GetGraphCount(graph_id, count) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id);
    return INTERNAL_ERROR;
  }
  GELOGD("Add graph finished, graph_id:%u", graph_id);
  if (count > 1) {
    GELOGD("Finish addgraph, graph_id:%u, graph_count:%u, start to notify.", graph_id, count);
    add_graph_cv_.notify_all();
  }
  return SUCCESS;
 }

 Status GraphManager::CreateGraphNode(uint32_t graph_id, const Graph &graph,
                                     const std::map<std::string, std::string> &options) {
  GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
  GE_IF_BOOL_EXEC(graph_node == nullptr, GELOGE(FAILED, "GraphNode make shared failed");
                  return FAILED);
@@ -365,7 +430,62 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
  ParseOption(options, TUNING_PATH, options_.tuning_path);
  graph_node->SetGraph(graph_ptr);
  graph_node->SetOptions(options);
  graph_node->IncreaseLoadCount();
  AddGraphNode(graph_id, graph_node);
  return SUCCESS;
 }

 Status GraphManager::SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options) {
  CompilerStages &stages = GetCompilerStages(graph_id);
  stages.preparer.SetOptions(options_);
  Status status = stages.optimizer.SetOptions(options_);
  if (status != SUCCESS) {
    GELOGE(status, "Graph optimizer set options failed.");
    return status;
  }
  stages.builder.SetOptions(options_);
  return SUCCESS;
 }

 Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
                              const std::map<std::string, std::string> &options,
                              const OmgContext &omg_context) {
  IncreaseGraphCount(graph_id);
  // validation for adding graphs of same graph_id in multi-thread secenario
  // 1.previous thread owns same graph_id has finished the AddGraph procession
  if (GetAddGraphCondition(graph_id) == kDoneAdded) {
    GraphNodePtr graph_node;
    if (GetGraphNode(graph_id, graph_node) != SUCCESS) {
      GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "Graph not exist while done adding previously, graph_id = %u.", graph_id);
      return GE_GRAPH_GRAPH_NOT_EXIST;
    }
    graph_node->IncreaseLoadCount();
    return SUCCESS;
  }
  // In multi-thread scenario, former thread owns same graph_id has been
  // in the middle of the AddGraph procession while following threads have to wait until
  // done adding graph of the former graph, avoiding repeatively adding same graph.
  bool is_added = false;
  if (CheckRepeatAdd(graph_id, is_added) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "CheckRepeatAdd for graph[id:%u] failed.", graph_id);
    return INTERNAL_ERROR;
  }
  // The former graph (from different thread) owns same graph id has been successfully added.
  if (is_added) {
    return SUCCESS;
  }
  // Do add graph
  SetAddGraphCondition(graph_id, kStartAdd);
  auto compute_graph = GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);
  compute_graph->SetGraphID(graph_id);

  SetSessionGraphId(compute_graph, graph_id);

  if (CreateGraphNode(graph_id, graph, options) != SUCCESS) {
    GELOGE(FAILED, "Failed to create graph_node.");
    return FAILED;
  }

  AddLocalOmgContext(graph_id, omg_context);
  if (!options_.output_datatype.empty()) {
@@ -376,16 +496,18 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
    return GRAPH_PARAM_INVALID;
  }

  CompilerStages &stages = GetCompilerStages(graph_id);
  stages.preparer.SetOptions(options_);
  Status status = stages.optimizer.SetOptions(options_);
  if (status != SUCCESS) {
    GELOGE(status, "Graph optimizer set options failed.");
    return status;
  if (SetStagesOptions(graph_id, options_) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Set stage options failed.");
    return INTERNAL_ERROR;
  }
  stages.builder.SetOptions(options_);

  var_acc_ctrl_.AddGraph(graph_id, compute_graph);
  SetAddGraphCondition(graph_id, kDoneAdded);
  // There are threads waitting for adding same graph
  if (NotifyWaittingGraph(graph_id) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "NotifyWaittingGraph failed.");
    return INTERNAL_ERROR;
  }
  return SUCCESS;
 }

@@ -895,6 +1017,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
    if (!graph_node->IsAsync()) {
      ret = LoadGraph(ge_root_model, graph_node);
    } else {
      GE_CHECK_NOTNULL(ge_root_model);
      ret = LoadGraphAsync(ge_root_model, graph_node);
    }
    if (ret != SUCCESS) {
@@ -909,6 +1032,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
    if (!graph_node->IsAsync()) {
      ret = LoadGraph(ge_root_model_ptr, graph_node);
    } else {
      GE_CHECK_NOTNULL(ge_root_model);
      ret = LoadGraphAsync(ge_root_model_ptr, graph_node);
    }
    if (ret != SUCCESS) {
@@ -921,6 +1045,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
 Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) {
  GELOGI("[LoadGraph] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId());
  if (options_.run_graph_flag && ge_root_model != nullptr) {
    ge_root_model->SetTrainFlag(GetTrainFlag());
    // synchronization run graph with model
    std::shared_ptr<GraphModelListener> model_listener = GetModelListener();
    ModelIdInfo model_id_info;
@@ -1315,54 +1440,29 @@ bool GraphManager::CheckModelLoad(const GeRootModelPtr &ge_root_model, bool load
 }

 Status GraphManager::RemoveGraph(const GraphId &graph_id) {
  auto it = to_be_deleted_graphs_.find(graph_id);
  if (it != to_be_deleted_graphs_.end()) {
    to_be_deleted_graphs_.erase(it);
  }
  GraphNodePtr graph_node = nullptr;
  Status ret = GetGraphNode(graph_id, graph_node);
  if (ret != SUCCESS) {
  if (ret != SUCCESS || graph_node == nullptr) {
    REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
                       graph_id, __FUNCTION__);
    GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id);
    return GE_GRAPH_GRAPH_NOT_EXIST;
  }

  if ((graph_node == nullptr) || (graph_node->GetRunFlag())) {
    GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id);
    return GE_GRAPH_GRAPH_IS_RUNNING;
  if (graph_node->GetRunFlag()) {
    // only put graph into to-be-deleted list when exceptional scenario
    to_be_deleted_graphs_.insert(graph_id);
    GELOGI("[GraphManager] Trying to remove running graph[Id:%u], added into to_be_deleted_graphs_.", graph_id);
    return SUCCESS;
  }

  std::lock_guard<std::mutex> lock(unload_model_mutex_);

  Status middle_ret;
  rtError_t rt_ret;
  const std::vector<SubGraphInfoPtr> &all_sub_graph = graph_node->GetAllSubGraph();
  for (size_t i = 0; i < all_sub_graph.size(); ++i) {
    // must free buffer firstly
    middle_ret = all_sub_graph[i]->FreeInOutBuffer();
    if (middle_ret != SUCCESS) {
      GELOGE(middle_ret, "[GraphManager] RemoveGraph free mem failed, graph_id=%u.", graph_id);
      ret = middle_ret;
    }
    if (all_sub_graph[i]->GeModelIsValid() && all_sub_graph[i]->GetModelIdInfo().model_id != INVALID_MODEL_ID) {
      // unload model
      GELOGI("UnloadModel via new ome.");
      rt_ret = rtSetDevice(GetContext().DeviceId());
      if (rt_ret != RT_ERROR_NONE) {
        GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.",
               all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
        ret = FAILED;
        continue;
      }
      middle_ret = GraphLoader::UnloadModel(all_sub_graph[i]->GetModelIdInfo().model_id);
      if (middle_ret != SUCCESS) {
        GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.",
               all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
        ret = middle_ret;
      }
      rt_ret = rtDeviceReset(GetContext().DeviceId());
      if (rt_ret != RT_ERROR_NONE) {
        GELOGE(RT_FAILED, "[GraphManager:] unload model failed, modelId=%u, graphId=%u.",
               all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
        ret = FAILED;
      }
    }
  }
  var_acc_ctrl_.RemoveGraph(graph_id);
  RemoveGraphNode(graph_id);

@@ -1370,28 +1470,33 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {

  auto ge_root_model = graph_node->GetGeRootModel();
  if (CheckModelLoad(ge_root_model, graph_node->GetLoadFlag())) {
    GELOGI("Unload model %u.", ge_root_model->GetModelId());
    rt_ret = rtSetDevice(GetContext().DeviceId());
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
             graph_id);
      return FAILED;
    }
    middle_ret = GraphLoader::UnloadModel(ge_root_model->GetModelId());
    // same graph may be added for several times, different models were created separately,
    // unload them respectively.
    middle_ret = UnloadModel(ge_root_model, graph_id);
    if (middle_ret != SUCCESS) {
      GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.", ge_root_model->GetModelId(),
             graph_id);
      REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check unload detail in GraphLoader %s",
                         graph_id, __FUNCTION__);
      GELOGE(middle_ret, "[GraphManager:] unload model failed, graph_id=%u.", graph_id);
      ret = middle_ret;
    }
    rt_ret = rtDeviceReset(GetContext().DeviceId());
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
             graph_id);
      REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u, when GraphManager %s",
                          GetContext().DeviceId(), graph_id, __FUNCTION__);
      GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id);
      ret = FAILED;
    }
  }

  RemoveCompilerStages(graph_id);
  RemoveGraphCount(graph_id);
  RemoveAddGraphCondition(graph_id);

  GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id);
  GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id);
@@ -2409,6 +2514,7 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr
 Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) {
  GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId());
  if (options_.run_graph_flag && ge_root_model != nullptr) {
    ge_root_model->SetTrainFlag(GetTrainFlag());
    // synchronization run graph with model
    ModelIdInfo model_id_info;
    bool is_unknown_shape = false;
@@ -2425,9 +2531,9 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
      }
    }
    GE_TIMESTAMP_START(LoadGraph);
    GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_);
    Status ret =
        GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_);
    auto listener = MakeShared<RunAsyncListener>();
    GE_CHECK_NOTNULL(listener);
    Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, listener);
    GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync");
    if (ret != SUCCESS) {
      GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed");
@@ -2441,6 +2547,52 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
  return SUCCESS;
 }

 void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node,
                                 const std::vector<uint32_t> &model_ids, uint32_t graph_id, uint64_t session_id) {
  rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, when GraphManager %s",
                      GetContext().DeviceId(), __FUNCTION__);
    GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, graphId=%u.", graph_id);
    return;
  }
  for (auto model_id : model_ids) {
    uint64_t max_memory_size = 0;
    Status result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size);
    if (result != SUCCESS) {
      continue;
    }
    GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id,
           max_memory_size);
    if (model_ids.size() > 1) {
      result = ge_model->GetSessionId(model_id, session_id);
      if (result != SUCCESS) {
        GELOGW("[GraphManager:] get session failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
               graph_id);
        continue;
      }
    }
    result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
    if (result != SUCCESS) {
      GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
             graph_id);
    }
    result = GraphLoader::UnloadModel(model_id);
    if (result != SUCCESS) {
      GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
    }
    GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success.", graph_id, model_id);
  }
  graph_node->SetLoadFlag(false);
  rt_ret = rtDeviceReset(GetContext().DeviceId());
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, when GraphManager %s",
                      GetContext().DeviceId(), __FUNCTION__);
    GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id);
    return;
  }
 }

 Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) {
  GELOGI("CheckAndReleaseMemory graph_id[%u]", graph_node->GetGraphId());
  int64_t value = 0;
@@ -2484,6 +2636,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
      continue;
    }
    auto model_id = model->GetModelId();
    auto model_ids = model->GetAllModelId();
    // unload model not release
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape));
@@ -2496,34 +2649,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
      GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id);
      continue;
    }
    uint64_t max_memory_size = 0;
    result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size);
    if (result != SUCCESS) {
      continue;
    }
    GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id,
           max_memory_size);
    rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id);
      continue;
    }
    result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
    if (result != SUCCESS) {
      GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
             graph_id);
    }
    result = GraphLoader::UnloadModel(model_id);
    if (result != SUCCESS) {
      GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
    }
    rt_ret = rtDeviceReset(GetContext().DeviceId());
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", model_id, graph_id);
      continue;
    }
    it.second->SetLoadFlag(false);
    GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success and set LoadFlag to false.", graph_id, model_id);
    ReleaseMemory(ge_model, it.second, model_ids, graph_id, session_id);
  }

  return SUCCESS;
@@ -2659,6 +2785,38 @@ void GraphManager::ConstructGeInput(const vector<InputTensorInfo> &inputs, vecto
  }
 }

 Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args,
                                              GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model) {
  if (!graph_manager->IsGraphNeedBuild(graph_node)) {
    ge_root_model = graph_node->GetGeRootModel();
    return SUCCESS;
  }
  if (graph_node->GetBuildFlag()) {
    ReturnError(graph_manager, args.callback, PARAM_INVALID,
                "The graph " + std::to_string(graph_node->GetGraphId()) +
                " need to re-build, you should remove it"
                " from GE first, then AddGraph again and rebuild it.");
    graph_node->Unlock();
    return PARAM_INVALID;
  }
  // check need incre build.
  GeModelPtr ge_model = nullptr;
  if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
    std::vector<GeTensor> ge_inputs;
    ConstructGeInput(args.input_tensor, ge_inputs);
    Status ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
    // release rts generate context
    RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId());
    if (ret != SUCCESS) {
      ReturnError(graph_manager, args.callback, ret, "PreRun Failed.");
      return ret;
    }
  }
  graph_node->SetBuildFlag(true);
  graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId());
  return SUCCESS;
 }

 void GraphManager::PreRunThread(GraphManager *graph_manager) {
  if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) {
    GELOGW("Set thread name failed.");
@@ -2671,7 +2829,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
      continue;
    }

    GELOGI("A new loop start.");
    GELOGI("[PreRunThread] A new loop start, graph_id:%u.", args.graph_id);

    ErrorManager::GetInstance().SetErrorContext(args.error_context);
    ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
@@ -2687,7 +2845,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
                  "[RunGraph] graph not exist, graph_id=" + std::to_string(args.graph_id));
      return;
    }

    // more than one graph owns same graph_id
    uint32_t count = 0;
    if (graph_manager->GetGraphCount(args.graph_id, count) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed.", args.graph_id);
      return;
    }
    // Avoid repeatively prerun for graphs owns same graph_id in online inference concurrency
    if (count > 1 && graph_node->GetBuildFlag()) {
      graph_node->Lock();
      GELOGD("Avoid repeatively prerun, graph_id:%u.", args.graph_id);
      // In online inference concurrency senario, graph_node is allowed to be locked for 'count' times
      graph_node->SetSemSize(count);
      graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context,
          args.input_tensor, graph_node->GetGeRootModel(), GetThreadLocalContext(), args.callback }));
      GELOGI("[PreRunThread] Loop end. Start to run with cached build model.");
      continue;
    }
    // Cannot be put ahead of the repeatively prerun judgement
    graph_node->Lock();

    if (graph_node->GetRunFlag()) {
@@ -2719,46 +2894,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
    // it will not execute graph preprocess, optimize, parition, build if the graph has built successful.
    GELOGI("Start for run graph async.");
    GeRootModelPtr ge_root_model = nullptr;
    if (graph_manager->IsGraphNeedBuild(graph_node)) {
      if (graph_node->GetBuildFlag()) {
        ReturnError(graph_manager, args.callback, PARAM_INVALID,
                    "The graph " + std::to_string(graph_node->GetGraphId()) +
                        " need to re-build, you should remove it"
                        " from GE first, then AddGraph again and rebuild it.");

    ret = CheckIncreBuildAndPreRun(graph_manager, args, graph_node, ge_root_model);
    if (ret != SUCCESS) {
      graph_node->SetRunFlag(false);
      if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) {
        ReturnError(graph_manager, args.callback, ret, "CheckIncreBuildAndPreRun Failed, thread exit..");
        graph_node->Unlock();
        return;
      } else {
        ReturnError(graph_manager, graph_node, args.callback, ret,
                    "CheckIncreBuildAndPreRun Failed, keep geop continue!");
        graph_node->Unlock();
        continue;
      }

      // check need incre build.
      GeModelPtr ge_model = nullptr;
      if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
        std::vector<GeTensor> ge_inputs;
        ConstructGeInput(args.input_tensor, ge_inputs);
        ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
        // release rts generate context
        RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId());
        if (ret != SUCCESS) {
          graph_node->SetRunFlag(false);
          if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) {
            ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit..");
            graph_node->Unlock();
            return;
          } else {
            ReturnError(graph_manager, graph_node, args.callback, ret, "PreRun Failed, keep geop continue!");
            graph_node->Unlock();
            continue;
          }
        }
      }
      graph_node->SetBuildFlag(true);
      graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId());
    } else {
      ge_root_model = graph_node->GetGeRootModel();
    }

    graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context,
        args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback }));
    GELOGI("Loop end.");
    GELOGI("[PreRunThread] Loop end.");
  }
 }

@@ -2855,16 +3008,13 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
      continue;
    }

    GELOGI("A new loop start.");
    GELOGI("[RunThread] A new loop start, graph_id:%u.", args.graph_id);

    ErrorManager::GetInstance().SetErrorContext(args.error_context);
    GetContext().SetSessionId(args.session_id);
    GetThreadLocalContext() = args.context;
    graph_manager->UpdateLocalOmgContext(args.graph_id);

    if (args.graph_node->graph_run_async_listener_ != nullptr) {
      args.graph_node->graph_run_async_listener_->SetCallback(args.callback);
    }
    Status ret;
    // parse inputs.dims to vector<vector<uint64_t>> dynamic_dims
    ret = graph_manager->ParseInputsDims(args.input_tensor);
@@ -2874,8 +3024,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
      return;
    }

    args.graph_node->UpdateLoadFlag();
    if (!args.graph_node->GetLoadFlag()) {
      ErrorManager::GetInstance().SetStage(ErrorMessage::kModelLoad, ErrorMessage::kModelLoad);
      args.ge_root_model->SetTrainFlag(graph_manager->GetTrainFlag());
      ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node);
      if (ret != SUCCESS || args.ge_root_model == nullptr) {
        StopQueue(graph_manager);
@@ -2883,6 +3035,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
        args.graph_node->Unlock();
        return;
      }
      // control the times of graph loading in multi-thread scenario
      args.graph_node->DecreaseLoadCount();
      args.graph_node->IncreaseLoadRecord();

      args.graph_node->SetLoadFlag(true);
      GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(),
             args.ge_root_model->GetModelId());
@@ -2898,7 +3054,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
    }

    ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(),
                                                           args.input_tensor);
                                                           args.input_tensor, args.callback);
    args.graph_node->SetRunFlag(false);
    if (ret != SUCCESS) {
      ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit.");
@@ -3314,4 +3470,49 @@ void GraphManager::RemoveCompilerStages(GraphId graph_id) {
  std::lock_guard<std::mutex> lock(member_mutex_);
  compiler_stages_.erase(graph_id);
 }

 void GraphManager::IncreaseGraphCount(GraphId graph_id) {
  std::lock_guard<std::mutex> lock(graph_count_mutex_);
  auto it = graph_count_.find(graph_id);
  if (it == graph_count_.end()) {
    graph_count_.insert({graph_id, kInitGraphCount});
    GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
  } else {
    ++graph_count_[graph_id];
    GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
  }
 }

 void GraphManager::RemoveGraphCount(GraphId graph_id) {
  std::lock_guard<std::mutex> lock(graph_count_mutex_);
  auto it = graph_count_.find(graph_id);
  if (it == graph_count_.end()) {
    GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id);
  } else {
    GELOGD("RemoveGraphCount success, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
    graph_count_.erase(it);
  }
 }

 void GraphManager::DecreaseGraphCount(GraphId graph_id) {
  std::lock_guard<std::mutex> lock(graph_count_mutex_);
  auto it = graph_count_.find(graph_id);
  if (it == graph_count_.end()) {
    GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id);
  } else {
    --it->second;
    GELOGD("After DecreaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
  }
 }

 Status GraphManager::GetGraphCount(GraphId graph_id, uint32_t &count) {
  std::lock_guard<std::mutex> lock(graph_count_mutex_);
  auto it = graph_count_.find(graph_id);
  if (it == graph_count_.end()) {
    GELOGW("Graph [id:%u] has not been added.", graph_id);
    return FAILED;
  }
  count = it->second;
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -184,6 +184,20 @@ class GraphManager {

  Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results);

  void RemoveGraphCount(GraphId graph_id);

  void IncreaseGraphCount(GraphId graph_id);

  void DecreaseGraphCount(GraphId graph_id);

  Status GetGraphCount(GraphId graph_id, uint32_t &count);

  void SetAddGraphCondition(GraphId graph_id, uint32_t cond);

  uint32_t GetAddGraphCondition(GraphId graph_id);

  void RemoveAddGraphCondition(GraphId graph_id);

 private:
  struct CompilerStages {
    GraphPrepare preparer;
@@ -380,6 +394,24 @@ class GraphManager {
  CompilerStages &GetCompilerStages(GraphId graph_id);
  void RemoveCompilerStages(GraphId graph_id);

  static Status CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, GraphNodePtr &graph_node,
                                         GeRootModelPtr &ge_root_model);

  void ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, const std::vector<uint32_t> &model_ids,
                     uint32_t graph_id, uint64_t session_id);

  Status CheckRepeatAdd(uint32_t graph_id, bool &is_added);

  Status NotifyWaittingGraph(uint32_t graph_id);

  Status CreateGraphNode(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options);

  Status SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options);

  Status UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id);

  void SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id);

  std::atomic_bool thread_run_flag_;
  BlockingQueue<PreRunArgs> prerun_args_q_{};
  BlockingQueue<RunArgs> run_args_q_{};
@@ -415,6 +447,16 @@ class GraphManager {

  std::mutex member_mutex_;
  std::mutex unload_model_mutex_;
  // avoid repeatively add same graph (owns same graph id)
  std::mutex add_graph_mutex_;
  std::mutex add_graph_cond_mutex_;
  std::condition_variable add_graph_cv_;

  std::map<GraphId, uint32_t> graph_id_to_add_graph_cond_;
  // use for multi-thread online-infer scenario
  std::set<GraphId> to_be_deleted_graphs_;
  std::map<GraphId, uint32_t> graph_count_;
  std::mutex graph_count_mutex_;
 };
 }  // namespace ge

--- a/ge/graph/manager/graph_manager_utils.cc
+++ b/ge/graph/manager/graph_manager_utils.cc
@@ -60,6 +60,15 @@ void GraphNode::Unlock() {
  sem_.Pop(unused);
 }

 void GraphNode::IncreaseLoadCount() {
  std::unique_lock<std::mutex> lock(load_count_mu_);
  if (load_record_ == kMaxLoadNum) {
    GELOGW("Reach the maximum of load_count:%u", kMaxLoadNum);
    return;
  }
  ++load_count_;
 }

 SubGraphInfo::SubGraphInfo() : subgraph_ptr_(nullptr), ge_model_ptr_(nullptr), malloc_flag_(false) {}

 SubGraphInfo::~SubGraphInfo() {
--- a/ge/graph/manager/graph_manager_utils.h
+++ b/ge/graph/manager/graph_manager_utils.h
@@ -55,6 +55,7 @@ using ConstGraphPtr = std::shared_ptr<const ge::Graph>;
 using GraphPtr = std::shared_ptr<ge::Graph>;

 const uint64_t INVALID_SESSION_ID = 0xffffffffffffffffULL;
 const uint32_t kMaxLoadNum = 8;

 struct ModelIdInfo {
  uint32_t model_id{INVALID_MODEL_ID};
@@ -162,6 +163,8 @@ class GraphNode {
  bool GetBuildFlag() const { return build_flag_; }
  void SetBuildFlag(bool buildFlag) { build_flag_ = buildFlag; }
  bool GetLoadFlag() const { return load_flag_; }
  // allow repeatively load graph owns same graph id
  void UpdateLoadFlag() { load_flag_ = load_count_ == 0 || load_record_ >= kMaxLoadNum; }
  void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; }
  void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; }
  GeModelPtr GetGeModel() const { return ge_model_; }
@@ -172,6 +175,13 @@ class GraphNode {
  void Lock();
  void Unlock();

  void SetSemSize(uint32_t size) { sem_.SetMaxSize(size); }

  uint32_t GetLoadCount() const { return load_count_; }
  void IncreaseLoadCount();
  void DecreaseLoadCount() { --load_count_; }
  void IncreaseLoadRecord() { ++load_record_; }

  // run graph asynchronous listener
  std::shared_ptr<RunAsyncListener> graph_run_async_listener_;

@@ -184,11 +194,17 @@ class GraphNode {
  GraphPtr graph_;
  ComputeGraphPtr compute_graph_;
  bool build_flag_;
  // load_flag_ is true if more than 1 model were loaded
  bool load_flag_;
  bool async_;
  GeModelPtr ge_model_;
  GeRootModelPtr ge_root_model_;
  BlockingQueue<uint8_t> sem_;
  // consist with graph_count of same graph_id in graph_manager
  uint32_t load_count_ = 0;
  // total times of loading a graph with same graph_id.
  uint32_t load_record_ = 0;
  std::mutex load_count_mu_;
 };

 using GraphNodePtr = std::shared_ptr<GraphNode>;
--- a/ge/graph/passes/atomic_addr_clean_pass.cc
+++ b/ge/graph/passes/atomic_addr_clean_pass.cc
@@ -126,11 +126,11 @@ bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int6

 bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) {
  OpDescPtr op_desc = node->GetOpDesc();
  std::map<string, std::map<int, int>> node_workspace_offset;
  std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size;
  bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
  bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
  node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset);
  if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) {
  atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size);
  if (!has_atomic_input && has_atomic_output && atomic_workspace_index_size.empty()) {
    std::vector<int64_t> atomic_output_index;
    (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
    bool is_all_output_peer_also_atomic = true;
@@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
      }
    }
  }
  return LinkToPotentialPrecedenceNode(graph, clean_addr_node);
 }

 // Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
 // node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream
 // concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control
 // edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
 // each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
 // successors of Data/Variable.
 Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) {
  GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
         atomic_clean_node->GetName().c_str());
  auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
  GE_CHECK_NOTNULL(out_ctrl_anchor);

  for (const auto &node : graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(node);
    bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty();
    if (!need_handle) {
      continue;
    }
    auto second_nodes = node->GetOutAllNodes();
    for (const auto &second_node : second_nodes) {
      GE_CHECK_NOTNULL(second_node);
      auto in_ctrl_anchor = second_node->GetInControlAnchor();
      GE_CHECK_NOTNULL(in_ctrl_anchor);
      if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
        GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor));
        GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str());
      }
    }
  }

  return SUCCESS;
 }

@@ -332,11 +365,11 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) {
  }

  // 2.Check atomic attr in node
  std::map<string, std::map<int, int>> node_workspace_offset;
  std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size;
  bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
  bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
  node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset);
  if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) {
  atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size);
  if (!has_atomic_input && !has_atomic_output && atomic_workspace_index_size.empty()) {
    return false;
  }

--- a/ge/graph/passes/atomic_addr_clean_pass.h
+++ b/ge/graph/passes/atomic_addr_clean_pass.h
@@ -67,6 +67,14 @@ class AtomicAddrCleanPass : public GraphPass {
   */
  Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node);

  /**
   * Link atomic clean node to all potential precedence nodes which may execute before atomic clean node
   * @param graph
   * @param atomic_clean_node
   * @return
   */
  Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node);

  /**
   * Check if this node is atomic op.
   * @param node
--- a/ge/graph/passes/attach_stream_label_pass.cc
+++ b/ge/graph/passes/attach_stream_label_pass.cc
@@ -137,7 +137,6 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea
      return INTERNAL_ERROR;
    }
    stream_label = node->GetInDataNodes().at(0)->GetName();
    GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
    bool value = false;
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
--- a/ge/graph/passes/base_pass.cc
+++ b/ge/graph/passes/base_pass.cc
@@ -30,8 +30,15 @@ constexpr int kMaxRePassTimes = 10000;
 constexpr size_t kMaxOneInNodes = 1000;
 // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later
 constexpr int kMaxRecursiveDepth = 20;
 struct DuringPassNodeSets {
  std::unordered_set<Node *> nodes_seen;
  std::unordered_set<NodePtr> nodes_deleted;
  std::unordered_set<NodePtr> nodes_re_pass;
  std::unordered_set<NodePtr> nodes_re_pass_immediately;
  std::unordered_set<NodePtr> nodes_last;
 };

 void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &input_edge_nodes,
 void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes,
                            std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) {
  nodes_last.clear();
  for (auto &node : graph->GetDirectNode()) {
@@ -40,7 +47,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i
    }
    size_t in_nums = node->GetInNodes().size();
    if (in_nums == 0) {
      input_edge_nodes.push(node);
      input_edge_nodes.push_back(node);
      nodes_seen.insert(node.get());
    } else if (in_nums > kMaxOneInNodes) {
      nodes_last.insert(node);
@@ -48,7 +55,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i
  }
 }

 void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &nodes_to_pass,
 void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass,
                      std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) {
  for (auto &node : nodes) {
    if (node == nullptr) {
@@ -60,13 +67,30 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &n

    bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen);
    if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) {
      nodes_to_pass.push(node);
      nodes_to_pass.push_back(node);
    }
  }
 }

 Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set<NodePtr> &nodes_re_pass,
                 std::unordered_set<NodePtr> &nodes_deleted, std::unordered_set<Node *> &nodes_seen) {
 void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass,
                        std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass,
                        std::unordered_set<NodePtr> &nodes_re_pass) {
  for (const auto &node_to_re_pass : nodes_to_re_pass) {
    if (node_to_re_pass == nullptr) {
      GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(),
             node->GetName().c_str(), node->GetType().c_str());
      continue;
    }
    if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
      GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str());
      nodes_re_pass.insert(node_to_re_pass);
    } else {
      GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str());
    }
  }
 }

 Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNodeSets &during_pass_node_set) {
  if (node == nullptr) {
    GELOGE(FAILED, "parameter is null.");
    return FAILED;
@@ -90,22 +114,15 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder
    }

    auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass();
    for (const auto &node_to_re_pass : nodes_to_re_pass) {
      if (node_to_re_pass == nullptr) {
        GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(),
               node->GetName().c_str(), node->GetType().c_str());
        continue;
      }
      if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
        GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str());
        nodes_re_pass.insert(node_to_re_pass);
      } else {
        GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str());
      }
    }
    PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass,
                       during_pass_node_set.nodes_re_pass);

    auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately();
    PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately,
                       during_pass_node_set.nodes_re_pass_immediately);

    auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted();
    nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end());
    during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end());
    if (nodes_deleted_by_pass.count(node) > 0) {
      GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(),
             name_to_pass.first.c_str());
@@ -181,36 +198,33 @@ Status GEPass::Run(const NamesToPass &names_to_passes) {

 Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
  GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size());
  std::queue<NodePtr> nodes;
  std::unordered_set<Node *> nodes_seen;
  std::unordered_set<NodePtr> nodes_deleted;
  std::unordered_set<NodePtr> nodes_re_pass;
  std::unordered_set<NodePtr> nodes_last;
  GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last);
  std::deque<NodePtr> nodes;
  DuringPassNodeSets during_pass_node_set;
  GetAllNodesNoInputEdge(graph_, nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last);
  GELOGD("Start points count %zu", nodes.size());
  int re_pass_times = 0;

  do {
    for (auto &node : nodes_re_pass) {
      nodes.push(node);
      nodes_seen.insert(node.get());
    for (auto &node : during_pass_node_set.nodes_re_pass) {
      nodes.push_back(node);
      during_pass_node_set.nodes_seen.insert(node.get());
    }
    nodes_re_pass.clear();
    during_pass_node_set.nodes_re_pass.clear();

    while (!nodes.empty()) {
      NodePtr node = nodes.front();
      nodes.pop();
      nodes.pop_front();

      (void)nodes_re_pass.erase(node);
      (void)during_pass_node_set.nodes_re_pass.erase(node);
      GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue);
      if (nodes_deleted.count(node) > 0) {
      if (during_pass_node_set.nodes_deleted.count(node) > 0) {
        GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str());
        continue;
      }

      AddNextIterNodes(node->GetOutNodes(), nodes, nodes_seen, nodes_last);
      AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last);

      auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen);
      auto ret = RunPasses(node, names_to_passes, during_pass_node_set);
      if (ret != SUCCESS) {
        GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u",
               node->GetName().c_str(), node->GetType().c_str(), ret);
@@ -227,7 +241,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
      if (has_sub_graph) {
        GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str());
        SetFlagOption(kOptimizeAfterSubGraph, names_to_passes);
        ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen);
        ret = RunPasses(node, names_to_passes, during_pass_node_set);
        if (ret != SUCCESS) {
          GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u",
                 node->GetName().c_str(), node->GetType().c_str(), ret);
@@ -239,16 +253,21 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
        // should be called each time at the begin of the iteration
        ClearOption(names_to_passes);
      }
      for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) {
        GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str());
        nodes.push_front(node);
      }
      during_pass_node_set.nodes_re_pass_immediately.clear();
    }

    for (auto &node : nodes_last) {
      bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen);
      if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) {
        nodes.push(node);
    for (auto &node : during_pass_node_set.nodes_last) {
      bool all_in_nodes_seen = node->IsAllInNodesSeen(during_pass_node_set.nodes_seen);
      if (all_in_nodes_seen && during_pass_node_set.nodes_seen.insert(node.get()).second) {
        nodes.push_back(node);
      }
    }
    nodes_last.clear();
  } while ((!nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes);
    during_pass_node_set.nodes_last.clear();
  } while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes);

  if (re_pass_times == kMaxRePassTimes) {
    GELOGW("re_pass_times should not come to %d", kMaxRePassTimes);
--- a/ge/graph/passes/base_pass.h
+++ b/ge/graph/passes/base_pass.h
@@ -53,6 +53,8 @@ class BaseNodePass {

  std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; }

  std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; }

  std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; }

  void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; }
@@ -62,6 +64,7 @@ class BaseNodePass {
  void init() {
    nodes_need_re_pass_.clear();
    nodes_deleted_.clear();
    nodes_need_re_pass_immediately_.clear();
  }

 protected:
@@ -79,6 +82,14 @@ class BaseNodePass {
  ///
  void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); }

  ///
  /// Add a node to be optimized immediately again. If you add a new node to the graph, or
  /// change a node connections, and you want to make sure the node will be
  /// optimized by other passes, call this function.
  /// @param node
  ///
  void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); }

  ///
  /// Add a node and it's input/output data nodes to be optimized again.
  /// @param node
@@ -109,6 +120,7 @@ class BaseNodePass {

 private:
  std::unordered_set<NodePtr> nodes_need_re_pass_;
  std::unordered_set<NodePtr> nodes_need_re_pass_immediately_;
  std::unordered_set<NodePtr> nodes_deleted_;
  std::map<NodePassOption, std::string> options_;
 };
--- a/ge/graph/passes/infershape_pass.cc
+++ b/ge/graph/passes/infershape_pass.cc
@@ -25,6 +25,7 @@

 namespace ge {
 Status InferShapePass::Run(NodePtr &node) {
  // kOptimizeAfterSubGraph exist means after subgraph
  auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph));
  if (ret != GRAPH_SUCCESS) {
    // select INFERSHAPE failed info
@@ -41,6 +42,20 @@ Status InferShapePass::Run(NodePtr &node) {
    GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str());
    return GE_GRAPH_INFERSHAPE_FAILED;
  }
  bool need_repass = false;
  auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "need_infer_again_", need_repass);
  if (has_attr) {
    if (!OptionExists(kOptimizeAfterSubGraph)) {
      return SUCCESS;
    }
    if (need_repass) {
      AddImmediateRePassNode(node);
      GELOGD("Node %s need repass immediately.", node->GetName().c_str());
    } else {
      // clear attr on while
      node->GetOpDesc()->DelAttr("need_infer_again_");
    }
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/graph/passes/merge_input_memcpy_pass.cc
+++ b/ge/graph/passes/merge_input_memcpy_pass.cc
@@ -23,7 +23,9 @@ namespace ge {
 Status MergeInputMemcpyPass::Run(ComputeGraphPtr graph) {
  GELOGD("MergeInputMemcpyPass Enter");
  for (const auto &node : graph->GetDirectNode()) {
    if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) {
    std::string type;
    GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed.");
    if ((type != MERGE) && (type != REFMERGE)) {
      continue;
    }
    GE_CHECK_NOTNULL(node->GetOpDesc());
@@ -95,4 +97,3 @@ NodePtr MergeInputMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph
  return graph->AddNode(op_desc);
 }
 }  // namespace ge

--- a/ge/graph/passes/merge_to_stream_merge_pass.cc
+++ b/ge/graph/passes/merge_to_stream_merge_pass.cc
@@ -25,7 +25,9 @@ Status MergeToStreamMergePass::Run(ComputeGraphPtr graph) {

  bypass_nodes_.clear();
  for (const auto &node : graph->GetDirectNode()) {
    if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) {
    std::string type;
    GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed.");
    if ((type != MERGE) && (type != REFMERGE)) {
      continue;
    }

--- a/ge/graph/passes/next_iteration_pass.cc
+++ b/ge/graph/passes/next_iteration_pass.cc
@@ -101,7 +101,8 @@ Status NextIterationPass::FindWhileGroups() {
    const std::string &frame_name = loop_group_iter.first;
    for (const auto &enter_node : loop_group_iter.second->enter_nodes) {
      for (const auto &out_node : enter_node->GetOutAllNodes()) {
        const string &type = out_node->GetType();
        std::string type;
        GE_CHK_STATUS_RET(GetOriginalType(out_node, type), "Get node type failed.");
        if ((type != MERGE) && (type != REFMERGE)) {
          continue;
        }
@@ -310,7 +311,8 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string
  }

  for (const auto &tmp_node : nodes) {
    const std::string type = tmp_node->GetType();
    std::string type;
    GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "Get node type failed.");
    if ((target_type == LOOPCOND) && (type == target_type)) {
      target_node = tmp_node;
      break;
--- a/ge/graph/passes/pass_utils.cc
+++ b/ge/graph/passes/pass_utils.cc
@@ -35,9 +35,9 @@
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_utils.h"
 #include "graph/utils/type_utils.h"
 #include "utils/node_utils.h"

 namespace ge {

 Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data,
                                              std::vector<GeTensorPtr> &v_output, const bool scalar_output) {
  Status ret = SUCCESS;
@@ -246,6 +246,12 @@ NodePtr PassUtils::GetInDataNode(const ConstNodePtr &node, int index) {
  return src_node;
 }

 NodePtr PassUtils::GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index) {
  auto src_node = GetInDataNode(node, index);

  return NodeUtils::GetInNodeCrossSubgraph(src_node);
 }

 bool PassUtils::IsNeedTrainIteFlowCtrl(const ComputeGraphPtr &compute_graph) {
  if (compute_graph == nullptr) {
    return false;
--- a/ge/graph/passes/pass_utils.h
+++ b/ge/graph/passes/pass_utils.h
@@ -30,6 +30,8 @@ class PassUtils {

  static NodePtr GetInDataNode(const ConstNodePtr &node, int index);

  static NodePtr GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index);

  static bool IsConstant(const ConstNodePtr &node);

  static Status SetOutNodeWeight(const OutDataAnchorPtr &out_data_anchor, const NodePtr &src_node);
--- a/ge/graph/passes/subexpression_migration_pass.cc
+++ b/ge/graph/passes/subexpression_migration_pass.cc
@@ -279,7 +279,7 @@ Status SubexpressionMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra
      const auto &in_anchor = in_anchors.at(i);
      const auto &base_node = in_anchor->GetOwnerNode();
      GELOGD("Get Data direct node: %s", base_node->GetName().c_str());
      if (!base_node->GetHostNode()) {
      if (!base_node->GetHostNode() || base_node->GetType() == SWITCH) {
        continue;
      }

--- a/ge/graph/passes/switch_dead_branch_elimination.cc
+++ b/ge/graph/passes/switch_dead_branch_elimination.cc
@@ -94,6 +94,12 @@ Status SwitchDeadBranchElimination::DeleteSwitchNode(NodePtr &node, NodePtr &pre
    GELOGE(FAILED, "parameter is null.");
    return FAILED;
  }

  // If two nodes aren't in same graph, get node's direct in_node instead of pred_node.
  if (node->GetOwnerComputeGraph() != pred_node->GetOwnerComputeGraph()) {
    pred_node = PassUtils::GetInDataNode(node, kPredInputIndex);
  }

  // link pred's in control nodes to switch
  if (GraphUtils::CopyInCtrlEdges(pred_node, node) != GRAPH_SUCCESS) {
    return FAILED;
@@ -131,7 +137,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) {
    return SUCCESS;
  }

  auto pred_node = PassUtils::GetInDataNode(node, kPredInputIndex);
  auto pred_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kPredInputIndex);
  if (pred_node == nullptr) {
    GELOGD("[%s] Pred input is null.", node->GetName().c_str());
    return SUCCESS;
@@ -143,7 +149,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) {
    return SUCCESS;
  }

  auto input_node = PassUtils::GetInDataNode(node, kDataInputIndex);
  auto input_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kDataInputIndex);
  if (input_node == nullptr) {
    GELOGD("[%s] Data input is null.", node->GetName().c_str());
    return SUCCESS;
--- a/ge/graph/passes/switch_to_stream_switch_pass.cc
+++ b/ge/graph/passes/switch_to_stream_switch_pass.cc
@@ -448,6 +448,8 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph)

        // select first stream_switch
        NodePtr stream_switch = switch_list.front();
        // set stream_label
        GE_CHK_STATUS_RET(SetStreamLabel(stream_switch, cast_node->GetName()), "Set stream label failed.");
        OpDescPtr switch_desc = stream_switch->GetOpDesc();
        GE_CHECK_NOTNULL(switch_desc);
        switch_desc->SetName(CheckDuplicateName(cond_group + "/" + STREAMSWITCH + (true_branch_flag ? "_t" : "_f")));
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -1772,8 +1772,8 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) {
        if (dim < UNKNOWN_DIM_NUM) {
          std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ;
          std::string reason = "it need >= -2";
          REPORT_INPUT_ERROR(
            "E19025", std::vector<std::string>({"situation", "reason"}),std::vector<std::string>({situation, reason}));
          REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}),
                             std::vector<std::string>({situation, reason}));
          GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim);
          return GE_GRAPH_INIT_FAILED;
        }
--- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc
+++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc
@@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() {
  if (!convert_flag) {
    string error_msg = "Top name " + related_input_name + "convert rank failed, Please"
                       " ensure top name in aipp config is the top name of data node.";
    GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
    GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
    REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
    return PARAM_INVALID;
  }

--- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
+++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
@@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() {
      if (another_item->related_input_name().empty()) {
        string error_msg = "Can not both set related_input_name and related_input_rank!"
                           " Please ensure param is the same with the first aipp config(related_input_name).";
        GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
        GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
        REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
        return PARAM_INVALID;
      }
      if (item->related_input_name() == another_item->related_input_name()) {
        string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name"
                           " param is different in different aipp config.";
        GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
        GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
        REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
        return PARAM_INVALID;
      }
    }
@@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() {
      if (!another_item->related_input_name().empty()) {
        string error_msg = "Can not both set related_input_rank and related_input_name!"
                           " Please ensure param is the same with the first aipp config(related_input_rank).";
        GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
        GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
        REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
        return PARAM_INVALID;
      }
      if (item->related_input_rank() == another_item->related_input_rank()) {
        string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank"
                          " param is different in different aipp config.";
        GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
        GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
        REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
        return PARAM_INVALID;
      }
    }
@@ -212,7 +216,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) {
        }
      }
    }
    GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), 
    GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt),
        PARAM_INVALID,
        "Can not config part of outputs of Data node to support AIPP, config all "
        "of the outputs of Data to support AIPP, or config none of them");
--- a/ge/host_cpu_engine/CMakeLists.txt
+++ b/ge/host_cpu_engine/CMakeLists.txt
@@ -3,6 +3,7 @@ set(PROTO_LIST
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 protobuf_generate(ge_atcstub PROTO_ATCSTUB_SRCS PROTO_ATCSTUB_HDRS ${PROTO_LIST})

 set(SRC_LIST
    "engine/host_cpu_engine.cc"
@@ -61,7 +62,7 @@ target_link_libraries(host_cpu_engine PRIVATE
 )

 ############ atcstub/libhost_cpu_engine.so ############
 add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_ATCSTUB_HDRS})

 target_compile_options(atc_host_cpu_engine PRIVATE
    -Werror
@@ -84,7 +85,7 @@ target_include_directories(atc_host_cpu_engine PRIVATE
    ${METADEF_DIR}/inc/external
    ${METADEF_DIR}/inc/external/graph
    ${CMAKE_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/proto/ge
    ${CMAKE_BINARY_DIR}/proto/ge_atcstub
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    #### blue zone ####
--- a/ge/host_kernels/gather_v2_kernel.cc
+++ b/ge/host_kernels/gather_v2_kernel.cc
@@ -407,7 +407,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe
  // check input data type
  auto x_data_type = tensor0->GetTensorDesc().GetDataType();
  if (supported_type.find(x_data_type) == supported_type.end()) {
    GELOGI("GatherV2Kernel does not support this Data type:%s.", TypeUtils::DataTypeToSerialString(x_data_type).c_str());
    GELOGI("GatherV2Kernel does not support this Data type:%s.",
           TypeUtils::DataTypeToSerialString(x_data_type).c_str());
    return NOT_CHANGED;
  }
  // calc output shape
--- a/ge/hybrid/executor/hybrid_execution_context.h
+++ b/ge/hybrid/executor/hybrid_execution_context.h
@@ -68,9 +68,10 @@ struct GraphExecutionContext {
  DumpProperties dump_properties;
  bool trace_enabled = false;
  bool dump_enabled = false;
  std::atomic_bool is_eos_;
  std::atomic_bool is_eos_{false};
  long profiling_level = 0;
  long iteration = 0;
  void *global_step = nullptr;

 private:
  Status status = SUCCESS;
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -46,10 +46,6 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) {
  model_id_ = model_id;
 }

 void HybridModelAsyncExecutor::SetModelName(const string &model_name) {
  om_name_ = model_name;
 }

 Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) {
  GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL,
                     "Data queue is full, please call again later, model_id %u ", model_id_);
@@ -67,6 +63,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
  future_ = std::async(std::launch::async, [&]() -> Status {
    GetThreadLocalContext() = *executor_->GetContext()->ge_context;
    GetContext().SetSessionId(executor_->GetContext()->session_id);
    GetContext().SetContextId(executor_->GetContext()->context_id);
    return RunInternal();
  });

@@ -105,7 +102,7 @@ Status HybridModelAsyncExecutor::Init() {
  executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
  GE_CHECK_NOTNULL(executor_);
  GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
  GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine");
  GE_CHK_STATUS_RET(DumpOpDebug(), "Dump op debug failed in hybrid engine");

  GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
  if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
@@ -136,8 +133,12 @@ Status HybridModelAsyncExecutor::RunInternal() {
  GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(device_id)); });

  while (run_flag_) {
    // Model has not indeedly started running before received data
    SetRunningFlag(false);
    std::shared_ptr<InputDataWrapper> data_wrapper;
    Status ret = data_inputer_->Pop(data_wrapper);
    // Model indeedly start running
    SetRunningFlag(true);
    if (data_wrapper == nullptr || ret != SUCCESS) {
      GELOGI("data_wrapper is null!, ret = %u", ret);
      continue;
@@ -166,6 +167,7 @@ Status HybridModelAsyncExecutor::RunInternal() {
    } else {
      GELOGI("HybridModel will execute in singleline mode");
      ge::GetContext().SetSessionId(executor_->GetContext()->session_id);
      ge::GetContext().SetContextId(executor_->GetContext()->context_id);
      ret = executor_->Execute(args);
    }
    ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
@@ -176,7 +178,8 @@ Status HybridModelAsyncExecutor::RunInternal() {

    RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] End", iterator_count_);
    iterator_count_++;
    GELOGI("run iterator count is %lu", iterator_count_);
    SetRunningFlag(false);
    GELOGI("run iterator count is %lu,  model_id:%u", iterator_count_, model_id_);
  }

  CsaInteract::GetInstance().WriteInternalErrorCode();
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -51,12 +51,16 @@ class HybridModelAsyncExecutor {

  void SetModelId(uint32_t model_id);

  void SetModelName(const string &model_name);

  Status Stop();

  Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);

  uint32_t GetDataInputerSize() { return data_inputer_->Size(); }

  bool GetRunningFlag() const { return running_flag_; }

  void SetRunningFlag(bool flag) { running_flag_ = flag; }

 private:
  Status InitInputDesc();

@@ -86,6 +90,8 @@ class HybridModelAsyncExecutor {
  uint32_t device_id_ = 0U;
  uint32_t model_id_ = 0U;
  std::atomic_bool run_flag_;
  // check whether model is running with data
  bool running_flag_ = false;
  std::unique_ptr<DataInputer> data_inputer_;
  std::unique_ptr<HybridModelExecutor> executor_;
  std::unique_ptr<HybridModelPipelineExecutor> pipe_executor_;
@@ -97,7 +103,6 @@ class HybridModelAsyncExecutor {
  std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
  std::vector<bool> is_input_dynamic_;
  std::shared_ptr<ModelListener> listener_;
  string om_name_;
  DataDumper data_dumper_;
  bool is_op_debug_reg_ = false;
  OpdebugRegister op_debug_register_;
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -46,7 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
  GELOGD("Start to execute model.");
  auto root_graph_item = model_->GetRootGraphItem();
  GE_CHECK_NOTNULL(root_graph_item);

  if (context_.global_step != nullptr) {
    GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
                                sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
  }
  SubgraphExecutor executor(model_->GetRootGraphItem(), &context_);
  auto ret = ExecuteGraphInternal(executor, args);
  Cleanup();
@@ -98,6 +101,7 @@ Status HybridModelExecutor::InitExecutionContext() {
  GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0));
  GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context));

  context_.global_step = model_->GetGlobalStep();
  context_.stream = stream_;
  context_.model = model_;
  context_.is_eos_ = false;
@@ -130,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context
  string ctx_id = std::to_string(context.context_id);
  RuntimeInferenceContext::DestroyContext(ctx_id);
  GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext");
  RuntimeInferenceContext *ctx = nullptr;
  GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context");
  for (auto &host_tensor : context.model->GetHostTensors()) {
    auto node_id = host_tensor.first;
    for (const auto &output_idx_and_tensor : host_tensor.second) {
      auto output_idx = output_idx_and_tensor.first;
      GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx);
      ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone());
    }
  }
  return SUCCESS;
 }
 }  // namespace hybrid
--- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
@@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) {
  string ctx_id = std::to_string(context.context_id);
  RuntimeInferenceContext::DestroyContext(ctx_id);
  GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext");
  RuntimeInferenceContext *ctx = nullptr;
  GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context");
  for (auto &host_tensor : context.model->GetHostTensors()) {
    auto node_id = host_tensor.first;
    for (const auto &output_idx_and_tensor : host_tensor.second) {
      auto output_idx = output_idx_and_tensor.first;
      GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx);
      ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone());
    }
  }
  return SUCCESS;
 }

--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -35,12 +35,14 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
         node_item.NodeName().c_str(),
         this->num_pending_shapes_);

  for (int i = 0; i < node_item.num_inputs; ++i){
    input_tensor_desc.emplace_back(*node_item.MutableInputDesc(i));
  input_tensor_desc.resize(node_item.num_inputs);
  for (int i = 0; i < node_item.num_inputs; ++i) {
    node_item.GetInputDesc(i, input_tensor_desc[i]);
  }

  for (int i = 0; i < node_item.num_outputs; ++i){
    output_tensor_desc.emplace_back(*node_item.MutableOutputDesc(i));
  output_tensor_desc.resize(node_item.num_outputs);
  for (int i = 0; i < node_item.num_outputs; ++i) {
    node_item.GetOutputDesc(i, output_tensor_desc[i]);
  }
 }

--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -227,6 +227,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
      if (node_item.is_dynamic) {
        auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
          GetContext().SetSessionId(context_->session_id);
          GetContext().SetContextId(context_->context_id);
          GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state));
          return PrepareForExecution(context_, *p_node_state);
        });
@@ -273,10 +274,8 @@ Status SubgraphExecutor::PrepareNodes(int group) {
 }

 Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
  GetContext().SetSessionId(context_->context_id);
  HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
                        "[%s] Failed to InferShape.", node_state.GetName().c_str());
  GetContext().SetSessionId(context_->session_id);
  HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
                        "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
  return SUCCESS;
@@ -345,6 +344,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
  GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str());
  auto prepare_future = std::async(std::launch::async, [&]() -> Status {
    GetContext().SetSessionId(context_->session_id);
    GetContext().SetContextId(context_->context_id);
    auto ret = PrepareNodes(group);
    ready_queue_.Push(nullptr);
    return ret;
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -206,37 +206,35 @@ Status NodeDoneCallback::DumpDynamicNode() {
    return PARAM_INVALID;
  }
  auto op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(graph_context_);
  const HybridModel *model = graph_context_->model;
  GE_CHECK_NOTNULL(model);
  std::string dynamic_model_name = model->GetModelName();
  std::string dynamic_om_name = model->GetOmName();
  uint32_t model_id = model->GetModelId();
  if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) {
    GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str());
    return SUCCESS;
  }
  dump_op_.SetDynamicModelInfo(dynamic_model_name, dynamic_om_name, model_id);

  auto stream = context_->GetStream();
  vector<uintptr_t> input_addrs;
  vector<uintptr_t> output_addrs;
  for (int i = 0; i < context_->NumInputs(); i++) {
    auto tensor_value = context_->GetInput(i);
    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
    uint64_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
    uintptr_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
    input_addrs.emplace_back(input_addr);
  }
  for (int j = 0; j < context_->NumOutputs(); j++) {
    auto tensor_value = context_->GetOutput(j);
    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
    uint64_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
    uintptr_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
    output_addrs.emplace_back(output_addr);
  }

  dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream);

  GE_CHECK_NOTNULL(graph_context_);
  const HybridModel *model = graph_context_->model;
  GE_CHECK_NOTNULL(model);
  std::string dynamic_model_name = model->GetModelName();
  uint32_t model_id = model->GetModelId();
  dump_op_.SetDynamicModelInfo(dynamic_model_name, model_id);

  void *global_step = nullptr;
  TensorValue *varible_global_step = context_->GetVariable(NODE_NAME_GLOBAL_STEP);
  if (varible_global_step != nullptr) {
    global_step = const_cast<void *>(varible_global_step->GetData());
  }

  void *loop_per_iter = nullptr;
  TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
  if (varible_loop_per_iter != nullptr) {
@@ -248,6 +246,7 @@ Status NodeDoneCallback::DumpDynamicNode() {
  if (varible_loop_cond != nullptr) {
    loop_cond = const_cast<void *>(varible_loop_cond->GetData());
  }
  void *global_step = context_->GetExecutionContext()->global_step;
  dump_op_.SetLoopAddr(global_step, loop_per_iter, loop_cond);

  GE_CHK_STATUS_RET(dump_op_.LaunchDumpOp(), "Failed to launch dump op in hybird model");
--- a/ge/hybrid/hybrid_davinci_model.cc
+++ b/ge/hybrid/hybrid_davinci_model.cc
@@ -19,6 +19,7 @@
 #include "hybrid/model/hybrid_model.h"
 #include "hybrid/executor/hybrid_model_async_executor.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "graph/manager/graph_manager_utils.h"

 namespace ge {
 namespace hybrid {
@@ -76,9 +77,8 @@ class HybridDavinciModel::Impl {
    executor_.SetDeviceId(device_id);
  }

  void SetModelName(const string &model_name) {
    model_.SetModelName(model_name);
    executor_.SetModelName(model_name);
  void SetOmName(const string &model_name) {
    model_.SetOmName(model_name);
  }

  uint64_t GetSessionId() {
@@ -108,6 +108,17 @@ class HybridDavinciModel::Impl {
    model_.SetModelDescVersion(is_new_model_desc);
  }

  uint32_t GetDataInputerSize() { return executor_.GetDataInputerSize(); }

  bool GetRunningFlag() const { return executor_.GetRunningFlag(); }

  Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
    auto listener = dynamic_cast<RunAsyncListener *>(listener_.get());
    GE_CHECK_NOTNULL(listener);
    listener->SetCallback(callback);
    return SUCCESS;
  }

 private:
  std::shared_ptr<ModelListener> listener_;
  HybridModel model_;
@@ -181,9 +192,9 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
  }
 }

 void HybridDavinciModel::SetModelName(const string &model_name) {
 void HybridDavinciModel::SetOmName(const string &om_name) {
  if (impl_ != nullptr) {
    impl_->SetModelName(model_name);
    impl_->SetOmName(om_name);
  }
 }

@@ -222,5 +233,16 @@ uint64_t HybridDavinciModel::GetSessionId() {
  GE_CHECK_NOTNULL(impl_);
  return impl_->GetSessionId();
 }

 uint32_t HybridDavinciModel::GetDataInputerSize() {
  GE_CHECK_NOTNULL(impl_);
  return impl_->GetDataInputerSize();
 }

 bool HybridDavinciModel::GetRunningFlag() const { return impl_->GetRunningFlag(); }

 Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
  return impl_->SetRunAsyncListenerCallback(callback);
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/hybrid_davinci_model.h
+++ b/ge/hybrid/hybrid_davinci_model.h
@@ -57,7 +57,7 @@ class HybridDavinciModel {

  void SetDeviceId(uint32_t device_id);

  void SetModelName(const string &model_name);
  void SetOmName(const string &om_name);

  uint64_t GetSessionId();

@@ -74,6 +74,12 @@ class HybridDavinciModel {

  void SetModelDescVersion(bool is_new_model_desc);

  uint32_t GetDataInputerSize();

  bool GetRunningFlag() const;

  Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);

 private:
  HybridDavinciModel() = default;
  class Impl;
--- a/ge/hybrid/hybrid_davinci_model_stub.cc
+++ b/ge/hybrid/hybrid_davinci_model_stub.cc
@@ -61,13 +61,17 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) {
 void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
 }

 void HybridDavinciModel::SetModelName(const string &model_name) {
 void HybridDavinciModel::SetOmName(const string &om_name) {
 }

 uint64_t HybridDavinciModel::GetSessionId() {
  return 0;
 }

 uint32_t HybridDavinciModel::GetDataInputerSize() {
  return 0;
 }

 Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
  return UNSUPPORTED;
 }
@@ -87,5 +91,13 @@ Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &i

 void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
 }

 bool HybridDavinciModel::GetRunningFlag() const {
  return false;
 }

 Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
  return UNSUPPORTED;
 }
 }  // namespace hybrid
 }  // namespace ge
Author	SHA1	Message	Date
lujiale	caf8b80936	update RELEASE.md.	4 years ago
mindspore-ci-bot	8770bfcdd7	!1526 update commite id From: @shenwei41 Reviewed-by: @liucunwei,@xsmq Signed-off-by: @liucunwei	4 years ago
shenwei41	329a144864	Merge remote-tracking branch 'origin/r1.2' into code_sync_0415	4 years ago
mindspore-ci-bot	da1a60bc02	!1512 multi-thread online infer From: @HW_KK Reviewed-by: @ji_chen,@wqtshg,@ji_chen Signed-off-by: @ji_chen	4 years ago
wuweikang	76c0c3a371	multi-thread online infer	4 years ago
mindspore-ci-bot	2d446b8def	!1503 change model_name for dump From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @liyihan123,@ji_chen	4 years ago
mindspore-ci-bot	c144b4bb9e	!1507 Adaptation run package 0412 From: @shenwei41 Reviewed-by: @lilongfei15,@liucunwei Signed-off-by: @lilongfei15,@liucunwei	4 years ago
shenwei41	4928f86819	update commit id	4 years ago
shenwei41	b5a55e9ca9	Merge remote-tracking branch 'origin/r1.2' into code_sync_0412	4 years ago
wjm	48d7b6dc8b	fix	4 years ago
wjm	daf8e56d25	fix	4 years ago
wjm	46156bf04f	fix dump	4 years ago
mindspore-ci-bot	ee67c45a2b	!1487 Fix hccl control dependency From: @xchu42 Reviewed-by: @ji_chen,@wqtshg,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	4b90851c68	!1484 remove unused func InsertMemcpyNode From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	2e8d863a1e	!1482 ge static check From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	44415f12c8	!1492 modify single op dump bug in c77 From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
chuxing	36f2c837bf	Fix hccl control dependency	4 years ago
zhou_chao1993	f49599b6c5	modify single op dump bug	4 years ago
mindspore-ci-bot	99e607c6d1	!1490 fix optional input bug From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy	4 years ago
wxl	d5f56ad31c	fix optional input bug	4 years ago
lichun	c73a3c7b46	fix sc check error	4 years ago
zhou_chao1993	f971f512e3	static check modify	4 years ago
mindspore-ci-bot	7f73eedb8a	!1478 Don't reset -2 when there is aicore op. From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	ed941d6d87	!1461 modify dump single op in c77 From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	089b82e9bd	!1469 modify dynamic shape dump in c77 From: @zhou_chao1993 Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy	4 years ago
unknown	e52c916f56	Don't reset -2 when there is aicore op.	4 years ago
mindspore-ci-bot	4c8e5f73c6	!1476 Bugfix: Missing hccl execution dependency due to wrong attribute type of _parallel_group From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen	4 years ago
mindspore-ci-bot	a4783ff468	!1460 Reduce weight memory usage & Remove redundant memcpy From: @xchu42 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
chuxing	19d1f804c7	Bugfix: keep hccl control dependency	4 years ago
zhou_chao1993	c90cae1410	modify dynamic shape dump	4 years ago
mindspore-ci-bot	4c0d85693a	!1463 Save atomic kernel bin to model. From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen	4 years ago
unknown	b48ecfe347	Save atomic kernel bin to model.	4 years ago
mindspore-ci-bot	d7b607dc83	!1464 fix aipp check From: @wangxiaotian22 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
zhou_chao1993	637bcc86d6	modify dump single op	4 years ago
wangxiaotian22	30743e1e59	fix aipp check	4 years ago
chuxing	24b2437361	Fix dump for known-shaped subgraph	4 years ago
mindspore-ci-bot	6d92a616ea	!1455 Synchronize latest Ascend software suite 06 Apr 2021 From: @nicholas_yhr Reviewed-by: @majorzhang,@lilongfei15 Signed-off-by: @majorzhang	4 years ago
yanghaoran	03e87b5570	Merge remote-tracking branch 'upstream/r1.2' into code_sync_0406	4 years ago
unknown	3ef3f54d94	Save atomic kernel bin to model.	4 years ago
mindspore-ci-bot	34f09f4fc8	!1447 LinkToPotentialPrecedenceNode From: @dimitri_rose Reviewed-by: @sheng-nan,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	73e7c53f8a	!1448 Fix bug of const input index. From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	494fa061a8	!1444 modify dump content in c77 From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	aeec1cb08b	!1446 modify set dump in c77 From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
unknown	960cc1fd64	Fix bug of const input index.	4 years ago
lianghao	5f1e659fcd	LinkToPotentialPrecedenceNode	4 years ago
zhou_chao1993	b1822cc73c	modify set dump in c77	4 years ago
zhou_chao1993	4931c4fa1e	modify dump content	4 years ago
mindspore-ci-bot	24d3b54ab8	!1443 synchronize latest ascend softare suite 02 Apr 2021 From: @nicholas_yhr Reviewed-by: @lilongfei15,@ljl0711 Signed-off-by: @lilongfei15	4 years ago
yanghaoran	2fbf01c53f	Merge remote-tracking branch 'origin/r1.2' into code_sync_0402	4 years ago
mindspore-ci-bot	9d6aaa117c	!1419 Add GetOriginalType for support RefSwitch & RefMerge From: @chen_yemeng Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	0da36c04e4	!1421 fixed sc warning From: @li-lei0106 Reviewed-by: @wqtshg,@xchu42 Signed-off-by: @ji_chen	4 years ago
mindspore-ci-bot	2ac43d4033	!1430 fix 1951 ts 4g bug From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	2112a36e80	!1415 support unknown while subgraph From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
wxl	68595a656a	fix ts 4g memory bug	4 years ago
李磊	890373c79c	fixed reviewbot warning	4 years ago
chenyemeng	7a40a575f7	Add GetOriginalType for support RefSwitch & RefMerge	4 years ago
lichun	701b0d6c1b	support unknown while subgraph	4 years ago
mindspore-ci-bot	da71533e55	!1345 fixed sc warning From: @li-lei0106 Reviewed-by: Signed-off-by:	4 years ago
mindspore-ci-bot	af83c480c5	!1388 Feature: Tiger online inference support From: @hugo1 Reviewed-by: @xchu42,@ji_chen,@wqtshg Signed-off-by: @ji_chen	4 years ago
zhaoxinxin	c936821629	modified: metadef	4 years ago
mindspore-ci-bot	971630a7d2	!1400 Bugfix: While loop failed to restore original input after execution From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @liyihan123,@ji_chen	4 years ago
mindspore-ci-bot	1735e1b1f3	!1402 l2 buffer for f1.3.0 From: @youui Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @liyihan123,@ji_chen	4 years ago
lichun	12cef9e9b9	support unknown while subgraph	4 years ago
mindspore-ci-bot	0679af1d75	!1409 update include files 0330 From: @shenwei41 Reviewed-by: @lilongfei15,@wenkai_dist,@ljl0711 Signed-off-by: @lilongfei15	4 years ago
shenwei41	5ddf2ac2b2	update include files to 1.2	4 years ago
李磊	7516130c7e	delete code	4 years ago
mindspore-ci-bot	de47249a72	!1407 update commit id to r1.2 0330 From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15	4 years ago
shenwei41	df1592e97a	Merge remote-tracking branch 'origin/r1.2' into code_sync_0330	4 years ago
yangwei	7ec6e4fe61	r13_l2	4 years ago
mindspore-ci-bot	7ed03d0d0e	!1398 fix import From: @youui Reviewed-by: @ljl0711,@liujunzhu Signed-off-by: @liujunzhu	4 years ago
李磊	1d0359d1c6	fixed pclint warning	4 years ago
李磊	e9868abe29	fixed sc warning by wangxiaotian	4 years ago
李磊	4fe73f77bc	fixed sc warning	4 years ago
yangwei	59a3e2e0ff	fix import	4 years ago
chuxing	4a7f623b12	while loop failed to restore input desc	4 years ago
zhaoxinxin	8e0634323d	modified: ge/graph/passes/base_pass.h	4 years ago
mindspore-ci-bot	f19cd2fca9	!1386 Adding dependencies by parallel groups From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	c691f2a7d7	!1385 Fix error of single_op memory free. From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	e2f04ddabd	!1375 bugfix for atomic_addr_clean_pass From: @yangyongqiang5033 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
zhaoxinxin	50552c3631	modified: ge/graph/passes/base_pass.cc modified: ge/graph/passes/base_pass.h modified: ge/graph/passes/infershape_pass.cc modified: ge/hybrid/executor/hybrid_model_async_executor.cc modified: ge/hybrid/executor/subgraph_executor.cc modified: ge/hybrid/node_executor/aicore/aicore_op_task.cc	4 years ago
chuxing	167621141b	hccl ops with same parallel group can not be execute parallelly	4 years ago
unknown	aad154cdf1	Fix error of single_op memory free.	4 years ago
mindspore-ci-bot	aead0be2d6	!1372 online_inference c77 From: @dimitri_rose Reviewed-by: @ji_chen,@sheng-nan Signed-off-by: @ji_chen	4 years ago
lianghao	2cf49ced1c	online_inference c77	4 years ago
mindspore-ci-bot	b8621d9d0e	!1370 bugfix for auto find fp From: @ni100die Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy	4 years ago
y00500818	9d34427af9	bugfix for atomic_addr_clean_pass	4 years ago
gengchao4@huawei.com	37c928ed29	bugfix for auto find fp	4 years ago
mindspore-ci-bot	0901ca5581	!1337 Fix bug of single_op inferdepend. From: @zhao_zhixuan Reviewed-by: @xchu42,@xchu42,@ji_chen Signed-off-by: @liyihan123,@ji_chen	4 years ago
mindspore-ci-bot	1224cdee8a	!1306 dump for unknownshape From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @ji_chen	4 years ago
mindspore-ci-bot	56007bea30	!1351 sync runtime head From: @zhou_chao1993 Reviewed-by: @xchu42,@youui,@ji_chen Signed-off-by: @liyihan123,@ji_chen	4 years ago
mindspore-ci-bot	168508b063	!1354 update include file 0325 From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	23b471ca2b	update include file 0325	4 years ago
mindspore-ci-bot	e2f929b761	!1352 update graphengine_0325 From: @shenwei41 Reviewed-by: @majorzhang,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	518ac24516	update metdef	4 years ago
shenwei41	a6bcb04c7f	Merge remote-tracking branch 'origin/r1.2' into code_sync_0325	4 years ago
mindspore-ci-bot	5367bbe395	!1316 fixed compiled issue for proto files From: @li-lei0106 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen	4 years ago
李磊	f0d897b0bb	fixed compiled issue for proto files	4 years ago
zhou_chao1993	13ecbe405a	sync runtime head	4 years ago
mindspore-ci-bot	3050d3984a	!1307 fix bug of dynamic shape load error From: @wan_xuelei Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	1ccd0dd9ee	!1341 modify profiing reporter data max len From: @zhengyuanhua Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy	4 years ago
zhengyuanhua	97d93adaa5	modify profiling reporter data max len	4 years ago
mindspore-ci-bot	4238e11e99	!1339 refactor label manager From: @zhoufeng54 Reviewed-by: @liujunzhu,@youui Signed-off-by: @youui	4 years ago
zhoufeng	ba2fcefa04	refactor label manager Signed-off-by: zhoufeng <zhoufeng54@huawei.com>	4 years ago
unknown	77d5468cf6	Fix bug of single_op inferdepend.	4 years ago
wxl	a89113e743	fix bug of dynamic shape load error	4 years ago
mindspore-ci-bot	e3fbf4d860	!1277 offline dynamic shape inference support From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	36eb9620d4	!1304 label goto implentment modified to be same as label switch From: @zhoufeng54 Reviewed-by: @liujunzhu,@majorzhang Signed-off-by: @majorzhang	4 years ago
zhupuxu	179e10f36b	label switch Signed-off-by: zhupuxu <zhupuxu@huawei.com>	4 years ago
mindspore-ci-bot	745153a252	!1302 update include headers 0318 From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei	4 years ago
shenwei41	f87dd9d016	update include headers	4 years ago
mindspore-ci-bot	d9d99c3cf5	!1300 Update GE commit id From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei	4 years ago
shenwei41	dfb2f4b7af	update commit id format	4 years ago
shenwei41	4a18a6791d	Merge remote-tracking branch 'origin/r1.2' into code_sync_0318	4 years ago
lichun	e6d3c77e80	offline dynamic shape inference support	4 years ago
mindspore-ci-bot	81ac111f09	!1259 Unique LabelGoto args addr From: @zhangxiaokun9 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy	4 years ago
mindspore-ci-bot	40e5c42a12	!1267 workspace of comm op can be reused From: @zhoufeng54 Reviewed-by: @youui,@liujunzhu Signed-off-by: @youui	4 years ago
zhoufeng	d6308151e0	reuse workspace memory of hccl op Signed-off-by: zhoufeng <zhoufeng54@huawei.com>	4 years ago
zhangxiaokun	6e874e8b87	Unique LabelGoto args addr	4 years ago
mindspore-ci-bot	4d6e7acc14	!1252 update submodule From: @wqtshg Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen	4 years ago
wqtshg	67bdf03f4b	update submodule	4 years ago
mindspore-ci-bot	f65be61197	!1250 update ge From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	6a82dd1947	update ge	4 years ago
mindspore-ci-bot	6ce82eff9b	!1242 update ge Compile Error From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	948000fe5b	fix ge Compile Error	4 years ago
mindspore-ci-bot	c0f3dcb4f4	!1241 update include header files 0311 From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	54a48678ae	update include headers 0311	4 years ago
mindspore-ci-bot	8737b1843d	!1234 update ge_0311 From: @shenwei41 Reviewed-by: @xsmq Signed-off-by:	4 years ago
shenwei41	10faa7b052	change	4 years ago
shenwei41	5dd83a9abd	Merge remote-tracking branch 'origin/release' into code_sync_0311	4 years ago
wjm	3401ca857c	dump for unknownshape	4 years ago
mindspore-ci-bot	92286b21ec	!1155 update metadef From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15	4 years ago
shenwei41	a1795d6554	update metdef	4 years ago
mindspore-ci-bot	f37d94ba21	!1153 update_headerfiles_0225 From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	3e5f9dac74	update include headers_0225	4 years ago
mindspore-ci-bot	d487b4781d	!1150 update graphengine_0225 From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	0ebcb55f3f	Merge remote-tracking branch 'origin/release' into code_sync_0225	4 years ago
mindspore-ci-bot	210a007d8e	!1139 update_headerfiles_0222 From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15	4 years ago
shenwei41	cc456d5803	update header files 0222	4 years ago
mindspore-ci-bot	b9050a8c51	!1133 update graphengine_0222 From: @shenwei41 Reviewed-by: @lilongfei15,@ljl0711 Signed-off-by: @ljl0711	4 years ago
shenwei41	028f47826e	Merge remote-tracking branch 'origin/release' into code_sync_0222	4 years ago
mindspore-ci-bot	6ebc4745de	!1104 modify pkg lib From: @changzherui Reviewed-by: @ljl0711,@liujunzhu Signed-off-by: @liujunzhu	4 years ago
changzherui	aa4045b31d	modify pkg lib	4 years ago
mindspore-ci-bot	8dc712ca01	!1096 update From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq	4 years ago
shenwei41	de4224d3ba	update	4 years ago
mindspore-ci-bot	d28090d511	!1095 update metadef From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15	4 years ago
shenwei41	543fd0a0e8	update metadef	4 years ago
mindspore-ci-bot	06894dcd48	!1093 update prebuild From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15	4 years ago
shenwei41	29e380abf6	update prebuild	4 years ago
mindspore-ci-bot	e7036bb984	!1079 update include file From: @shenwei41 Reviewed-by: @lilongfei15,@liucunwei Signed-off-by: @liucunwei	4 years ago
shenwei41	cffc6b2e2e	update include file	4 years ago
mindspore-ci-bot	ea0e2eadad	!1071 update commit id From: @shenwei41 Reviewed-by: @liujunzhu,@lilongfei15 Signed-off-by: @lilongfei15	4 years ago
shenwei41	c332519b2e	Merge remote-tracking branch 'origin/release' into code_sync_0203	4 years ago
mindspore-ci-bot	08e4e60663	!1056 modify error_codes.h From: @changzherui Reviewed-by: @lilongfei15,@liujunzhu Signed-off-by: @liujunzhu	4 years ago
changzherui	b4539d54cd	modify error_codes.h	4 years ago
mindspore-ci-bot	2d95c17f11	!1040 sync code 0126 From: @changzherui Reviewed-by: @liujunzhu,@lilongfei15 Signed-off-by: @liucunwei	4 years ago
changzherui	46dbe7a2fc	Merge remote-tracking branch 'upstream/release' into code_sync_126	4 years ago
mindspore-ci-bot	dfa4dd4acd	!1024 Synchronization code .h 0125 From: @changzherui Reviewed-by: @guoqi1024,@liujunzhu Signed-off-by: @guoqi1024	4 years ago
changzherui	ed7e35f927	sync code 0125 .h	4 years ago
mindspore-ci-bot	7183c03452	!1022 Synchronization code 0125 From: @changzherui Reviewed-by: @ljl0711,@guoqi1024 Signed-off-by: @guoqi1024	4 years ago
changzherui	a8b7570e71	Merge remote-tracking branch 'upstream/release' into code_sync_0125	4 years ago
mindspore-ci-bot	687d5f2ab1	!966 sync code 0116 h From: @changzherui Reviewed-by: @liujunzhu,@ljl0711 Signed-off-by: @ljl0711	4 years ago
yanghaoran	e3b32cd2a0	clang-format	4 years ago
changzherui	411e71f1f3	sync code h	4 years ago
mindspore-ci-bot	50f17e37d9	!963 code sync 0116 From: @changzherui Reviewed-by: @liujunzhu,@guoqi1024 Signed-off-by: @guoqi1024	4 years ago
changzherui	2b200b25ed	sync code 0116	4 years ago
mindspore-ci-bot	20a0326976	!567 prioritize json downloading from gitee From: @nicholas_yhr Reviewed-by: @liujunzhu,@youui Signed-off-by: @youui	5 years ago
yanghaoran	d77f36e017	prioritize json downloading from gitee	5 years ago
mindspore-ci-bot	75572bb987	!562 Synchronize latest Ascend software suite 09 Dec 2020 From: @nicholas_yhr Reviewed-by: @liujunzhu,@ljl0711 Signed-off-by: @ljl0711	5 years ago
yanghaoran	d5a82a7f98	Synchronize latest Ascend software suite 09 Dec 2020	5 years ago
mindspore-ci-bot	24b53b9282	!546 fix geruntime missing files and error codes From: @nicholas_yhr Reviewed-by: @youui,@liujunzhu Signed-off-by: @liujunzhu	5 years ago
yanghaoran	d731918198	fix geruntime missing files and error codes	5 years ago
mindspore-ci-bot	8712387b3c	!533 update headers for release branch, for mindspore use From: @nicholas_yhr Reviewed-by: @youui,@liujunzhu Signed-off-by: @liujunzhu	5 years ago
mindspore-ci-bot	22ac2e8c14	!537 find libraries from both atc and fwk paths From: @nicholas_yhr Reviewed-by: @youui,@liujunzhu Signed-off-by: @liujunzhu	5 years ago
yanghaoran	fec2e70eda	find libraries from both atc and fwk paths	5 years ago
yanghaoran	8011e1ea9a	update headers	5 years ago