!1096 update

From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq
4 years ago · 8dc712ca01
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 /build
 /output
 /prebuilts
 /cov
 *.ir
 *.out

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC)
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
        find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
        if(PLATFORM STREQUAL "train")
            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
            find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
            find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
            find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC)
        elseif(PLATFORM STREQUAL "inference")
            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
            find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
 	    find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
            if(PRODUCT STREQUAL "flr3")
            elseif(PRODUCT STREQUAL "flr1")
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC)
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
            endif()
        elseif(PLATFORM STREQUAL "all")
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
            find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
 	    find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
 	    find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
 	    find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
 	    find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
 	    find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
 	    find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
 	    find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
        else()
            message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
        endif()
--- a/build.sh
+++ b/build.sh
@@ -166,14 +166,14 @@ build_graphengine()
    echo "execute command: cmake ${CMAKE_ARGS} .. failed."
    return 1
  fi
  COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
  COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
  TARGET=${COMMON_TARGET}
  if [ "x${PLATFORM}" = "xtrain" ]
  then
    TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}"
    TARGET="ge_runner fwk_atc.bin ${TARGET}"
  elif [ "x${PLATFORM}" = "xinference" ]
  then
    TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}"
    TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}"
  elif [ "X$ENABLE_GE_UT" = "Xon" ]
  then
    TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest"
@@ -183,7 +183,7 @@ build_graphengine()
  elif [ "x${PLATFORM}" = "xall" ]
  then
    # build all the target
    TARGET=""
    TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}"
  fi
  
  make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install
@@ -198,8 +198,6 @@ g++ -v
 mk_dir ${OUTPUT_PATH}
 build_graphengine || { echo "GraphEngine build failed."; return; }
 echo "---------------- GraphEngine build finished ----------------"
 #cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}"
 #rm -rf "${OUTPUT_PATH}/"libproto*
 rm -f ${OUTPUT_PATH}/libgmock*.so
 rm -f ${OUTPUT_PATH}/libgtest*.so
 rm -f ${OUTPUT_PATH}/lib*_stub.so
@@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500

 echo "---------------- GraphEngine output generated ----------------"

 # if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then
 #     cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH}
 # fi

 if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
    cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH}
    cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH}
@@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
    cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH}
    cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH}

 #     if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then
 #         export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH}
 #         echo ${LD_LIBRARY_PATH}
    ${OUTPUT_PATH}/ut_libgraph &&
    ${OUTPUT_PATH}/ut_libge_multiparts_utest &&
    ${OUTPUT_PATH}/ut_libge_distinct_load_utest &&
@@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
        echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!"
        exit 1;
    fi
 #     fi

 #     if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
        echo "Generating coverage statistics, please wait..."
        cd ${BASEPATH}
        rm -rf ${BASEPATH}/cov
        mkdir ${BASEPATH}/cov
        lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
        lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
        cd ${BASEPATH}/cov
        genhtml coverage.info
    echo "Generating coverage statistics, please wait..."
    cd ${BASEPATH}
    rm -rf ${BASEPATH}/cov
    mkdir ${BASEPATH}/cov
    lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
    lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
    cd ${BASEPATH}/cov
    genhtml coverage.info
 fi

 # generate output package in tar form, including ut/st libraries/executables
@@ -256,6 +244,8 @@ generate_package()
  ATC_PATH="atc/lib64"
  ATC_BIN_PATH="atc/bin"
  FWK_BIN_PATH="fwkacllib/bin"
  FWK_INCLUDE_PATH="fwkacllib/include"
  ATC_INCLUDE_PATH="atc/include"
  NNENGINE_PATH="plugin/nnengine/ge_config"
  OPSKERNEL_PATH="plugin/opskernel"

@@ -277,6 +267,8 @@ generate_package()
  mk_dir "${OUTPUT_PATH}/${ACL_PATH}"
  mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}"
  mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}"
  mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}"
  mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}"
 
  cd "${OUTPUT_PATH}"

@@ -289,10 +281,10 @@ generate_package()
  find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \;

  MAX_DEPTH=1
  if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
  then
    MAX_DEPTH=2
  fi
 #  if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
 #  then
 #    MAX_DEPTH=2
 #  fi
  for lib in "${PLUGIN_OPSKERNEL[@]}";
  do
    find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \;
@@ -318,7 +310,15 @@ generate_package()
  find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \;
  find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \;
  find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \;
  

  cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH}
  cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH}
  cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH}

  cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH}
  cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH}
  cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH}

  if [ "x${PLATFORM}" = "xtrain" ]
  then
    tar -cf graphengine_lib.tar fwkacllib
@@ -339,4 +339,4 @@ then
  find ./ -name graphengine_lib.tar -exec rm {} \;
  tar -cf graphengine_lib.tar lib
 fi
 echo "---------------- GraphEngine package archive generated ----------------"
 echo "---------------- GraphEngine package archive generated ----------------"
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -639,15 +639,6 @@ set(INFER_SRC_LIST
    "graph/load/model_manager/task_info/model_exit_task_info.cc"
    "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
    "graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
    "single_op/task/op_task.cc"
    "single_op/task/build_task_utils.cc"
    "single_op/task/tbe_task_builder.cc"
    "single_op/task/aicpu_task_builder.cc"
    "single_op/task/aicpu_kernel_task_builder.cc"
    "single_op/single_op.cc"
    "single_op/single_op_model.cc"
    "single_op/stream_resource.cc"
    "single_op/single_op_manager.cc"
    "hybrid/hybrid_davinci_model_stub.cc"
    "ir_build/ge_ir_build.cc"
    "ir_build/atc_ir_common.cc"
@@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE
    FMK_SUPPORT_DUMP
    DAVINCI_CLOUD
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_compile_options(ge_runner PRIVATE
    -O2
    -fno-common
    -fvisibility=hidden
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
@@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_options(ge_runner PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_runner PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    adump_server
@@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE
    FMK_HOST_INFER
    COMPILE_OMG_PACKAGE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_compile_options(ge_compiler PRIVATE
    -O2
    -fno-common
    -fvisibility=hidden
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
@@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_options(ge_compiler PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_compiler PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    static_mmpa
@@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE
    -O2
    -fvisibility=hidden
 )

 target_link_options(opensrc_ascendcl PRIVATE
    -rdynamic
    -Wl,--allow-multiple-definition
@@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE
    -Wl,-Bsymbolic
    -Wl,--exclude-libs,ALL
 )

 target_link_libraries(opensrc_ascendcl PRIVATE
                     -Wl,--whole-archive
                     ge_executor
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -12,7 +12,7 @@ set(PROTO_LIST
    "${METADEF_DIR}/proto/tensorflow/tensor.proto"
    "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto"
    "${METADEF_DIR}/proto/tensorflow/types.proto"
    "${METADEF_DIR}/proto/tensorflow/versions.proto" 
    "${METADEF_DIR}/proto/tensorflow/versions.proto"
 )

 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
@@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE
    FMK_SUPPORT_DUMP
    OS_CENTOS
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_compile_options(ge_common PRIVATE
@@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_options(ge_common PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_common PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    static_mmpa
@@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE
    $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
    $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
    LOG_CPP
    FUNC_VISIBILITY
 )

 target_compile_options(ge_common_static PRIVATE
@@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE
    OS_CENTOS
    google=ascend_private
    LOG_CPP
    FUNC_VISIBILITY
 )

 target_compile_options(ge_common PRIVATE
@@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_options(ge_common PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_common PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf_static
--- a/ge/common/helper/model_cache_helper.cc
+++ b/ge/common/helper/model_cache_helper.cc
@@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const {
    return false;
  }
  // Compare allocated graph id info between json and VarManager
  std::unordered_map<std::string, uint32_t> allocated_graph_id;
  std::map<std::string, uint32_t> allocated_graph_id;
  auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
  if (ret != SUCCESS) {
    GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const {
    return false;
  }
  // Compare variable changed graph id info between json and VarManager
  std::unordered_map<std::string, uint32_t> changed_graph_id;
  std::map<std::string, uint32_t> changed_graph_id;
  auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
  if (ret != SUCCESS) {
    GELOGW("Fail to parse ChangedGraphId from Json.");
@@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const {
  }
  // Compare variable address info between json and VarManager
  std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
  std::unordered_set<uint64_t> var_offset_set;
  std::set<uint64_t> var_offset_set;
  auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
  if (ret != SUCCESS) {
    GELOGW("Fail to parse VarAddrMgrMap from Json.");
@@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const {
    GELOGW("Input param json type should be null or array.");
    return PARAM_INVALID;
  }
  std::unordered_map<std::string, uint32_t> allocated_graph_id;
  std::map<std::string, uint32_t> allocated_graph_id;
  auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
  if (ret != SUCCESS) {
    GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const {
    GELOGW("Input param json type should be null or array.");
    return PARAM_INVALID;
  }
  std::unordered_map<std::string, uint32_t> changed_graph_id;
  std::map<std::string, uint32_t> changed_graph_id;
  auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
  if (ret != SUCCESS) {
    GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
    return PARAM_INVALID;
  }
  std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
  std::unordered_set<uint64_t> var_offset_set;
  std::set<uint64_t> var_offset_set;
  auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
  if (ret != SUCCESS) {
    GELOGW("Fail to parse VarAddrMgrMap from Json.");
@@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemTyp

 Status ModelCacheHelper::ParseVarAddrMgrMapFromJson(
  const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
  std::unordered_set<uint64_t> &var_offset_set) {
  std::set<uint64_t> &var_offset_set) {
  if (!(json.is_array() || json.is_null())) {
    GELOGW("Input param json type should be null or array.");
    return PARAM_INVALID;
@@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson(
 }

 Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
                                                     std::unordered_map<std::string, uint32_t> &changed_graph_id) {
                                                     std::map<std::string, uint32_t> &changed_graph_id) {
  if (!(json.is_array() || json.is_null())) {
    GELOGW("Input param json type should be null or array.");
    return PARAM_INVALID;
@@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
 }

 Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json,
                                                       std::unordered_map<std::string, uint32_t> &allocated_graph_id) {
                                                       std::map<std::string, uint32_t> &allocated_graph_id) {
  if (!(json.is_array() || json.is_null())) {
    GELOGW("Input param json type should be null or array.");
    return PARAM_INVALID;
--- a/ge/common/helper/model_cache_helper.h
+++ b/ge/common/helper/model_cache_helper.h
@@ -95,15 +95,15 @@ class ModelCacheHelper {
  static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource);
  static Status ParseVarAddrMgrMapFromJson(const Json &json,
                                           std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
                                           std::unordered_set<uint64_t> &var_offset_set);
                                           std::set<uint64_t> &var_offset_set);
  static Status ParseCurVarTensorDescMapFromJson(
      const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map);
  static Status ParseTransRoadsFromJson(const Json &json,
                                        std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads);
  static Status ParseChangedGraphIdFromJson(const Json &json,
                                            std::unordered_map<std::string, uint32_t> &changed_graph_id);
                                            std::map<std::string, uint32_t> &changed_graph_id);
  static Status ParseAllocatedGraphIdFromJson(const Json &json,
                                              std::unordered_map<std::string, uint32_t> &allocated_graph_id);
                                              std::map<std::string, uint32_t> &allocated_graph_id);
  static Status ParseBroadcastInfoFromJson(const Json &json,
                                           std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info);
  static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name);
--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
    return false;
  }

  std::unordered_set<uint32_t> record;
  std::set<uint32_t> record;
  for (size_t i = 0; i < device_nums; ++i) {
    uint32_t dev_id = deviceid_list[i];
    if (dev_id >= static_cast<uint32_t>(dev_count)) {
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE
    $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
    $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
    $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )

 target_compile_definitions(ge_executor PRIVATE
@@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE
    LOG_CPP
 )

 target_include_directories(ge_executor PRIVATE
 target_include_directories(ge_executor SYSTEM PRIVATE
    ${GE_CODE_DIR}/ge
    ${GE_CODE_DIR}/inc
    ${GE_CODE_DIR}/inc/external
@@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE
    -Werror
    -O2
    -Wno-deprecated-declarations
    -fvisibility=hidden
 )

 target_compile_definitions(ge_executor_shared PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    DAVINCI_SUPPORT_PROFILING
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(ge_executor_shared PRIVATE
@@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(ge_executor_shared PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_executor_shared PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    msprofiler
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(ge_local_engine PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(ge_local_engine PRIVATE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(ge_local_engine PRIVATE
@@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(ge_local_engine PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_local_engine PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(atc_ge_local_engine PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(atc_ge_local_engine PRIVATE
    COMPILE_OMG_PACKAGE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(atc_ge_local_engine PRIVATE
@@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(atc_ge_local_engine PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(atc_ge_local_engine PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR
 target_compile_options(ge_local_opskernel_builder PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(ge_local_opskernel_builder PRIVATE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(ge_local_opskernel_builder PRIVATE
@@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(ge_local_opskernel_builder PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_local_opskernel_builder PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO
 target_compile_options(atc_ge_local_opskernel_builder PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(atc_ge_local_opskernel_builder PRIVATE
@@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(atc_ge_local_opskernel_builder PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(atc_ge_local_opskernel_builder PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR
 target_compile_options(ge_local_opskernel_builder_static PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
    google=ascend_private
    LOG_CPP
    FUNC_VISIBILITY
 )

 target_include_directories(ge_local_opskernel_builder_static PRIVATE
--- a/ge/ge_local_engine/engine/ge_local_engine.h
+++ b/ge/ge_local_engine/engine/ge_local_engine.h
@@ -17,6 +17,20 @@
 #ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_
 #define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <map>
 #include <memory>
 #include <string>
@@ -32,7 +46,7 @@ namespace ge_local {
 * ge local engine.
 * Used for the ops not belong to any engine. eg:netoutput
 */
 class GeLocalEngine {
 class GE_FUNC_VISIBILITY GeLocalEngine {
 public:
  /**
   * get GeLocalEngine instance.
@@ -94,25 +108,25 @@ extern "C" {
 * When Ge start, GE will invoke this interface
 * @return The status whether initialize successfully
 */
 ge::Status Initialize(const map<string, string> &options);
 GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);

 /**
 * After the initialize, GE will invoke this interface to get the Ops kernel Store
 * @param ops_kernel_map The ge local's ops kernel info
 */
 void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
 GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);

 /**
 * After the initialize, GE will invoke this interface to get the Graph Optimizer
 * @param graph_optimizers The ge local's Graph Optimizer objs
 */
 void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
 GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);

 /**
 * When the graph finished, GE will invoke this interface
 * @return The status whether initialize successfully
 */
 ge::Status Finalize();
 GE_FUNC_VISIBILITY ge::Status Finalize();
 }

 #endif  // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_
--- a/ge/ge_local_engine/engine/host_cpu_engine.h
+++ b/ge/ge_local_engine/engine/host_cpu_engine.h
@@ -16,6 +16,20 @@
 #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
 #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <mutex>
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/node.h"
@@ -23,7 +37,7 @@
 #include "external/../register/register.h"

 namespace ge {
 class HostCpuEngine {
 class GE_FUNC_VISIBILITY HostCpuEngine {
 public:
  ~HostCpuEngine() = default;

--- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h
+++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h
@@ -22,7 +22,7 @@

 namespace ge {
 namespace ge_local {
 class GeLocalOpsKernelBuilder : public OpsKernelBuilder {
 class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder {
 public:
  ~GeLocalOpsKernelBuilder() override;
  Status Initialize(const map<std::string, std::string> &options) override;
--- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h
+++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h
@@ -17,6 +17,20 @@
 #ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_
 #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <map>
 #include <string>
 #include <vector>
@@ -25,7 +39,7 @@

 namespace ge {
 namespace ge_local {
 class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore {
 class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore {
 public:
  GeLocalOpsKernelInfoStore() = default;

--- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h
@@ -21,7 +21,7 @@

 namespace ge {
 namespace ge_local {
 class GeDeletedOp : public Op {
 class GE_FUNC_VISIBILITY GeDeletedOp : public Op {
 public:
  GeDeletedOp(const Node &node, RunContext &run_context);

--- a/ge/ge_local_engine/ops_kernel_store/op/no_op.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.h
@@ -21,7 +21,7 @@

 namespace ge {
 namespace ge_local {
 class NoOp : public Op {
 class GE_FUNC_VISIBILITY NoOp : public Op {
 public:
  NoOp(const Node &node, RunContext &run_context);

--- a/ge/ge_local_engine/ops_kernel_store/op/op.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/op.h
@@ -29,7 +29,7 @@ namespace ge_local {
 /**
 * The base class for all op.
 */
 class Op {
 class GE_FUNC_VISIBILITY Op {
 public:
  Op(const Node &node, RunContext &run_context);

--- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte
 /**
 * manage all the op, support create op.
 */
 class OpFactory {
 class GE_FUNC_VISIBILITY OpFactory {
 public:
  static OpFactory &Instance();

@@ -72,7 +72,7 @@ class OpFactory {
  std::vector<std::string> all_ops_;
 };

 class OpRegistrar {
 class GE_FUNC_VISIBILITY OpRegistrar {
 public:
  OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
    OpFactory::Instance().RegisterCreator(type, func);
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE
    -fno-common
 )

 target_compile_definitions(ge_runtime PRIVATE 
 target_compile_definitions(ge_runtime PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    LOG_CPP
 )
@@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE
    ${CMAKE_BINARY_DIR}/proto/ge
 )

 target_link_options(ge_runtime PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(ge_runtime PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
            }
          }
          reusable_block->continuous_block_ = continuous;
          reusable_block->ref_count_++;
          reusable_blocks_[memory_type][stream_id].erase((++it).base());
          return reusable_block;
        }
@@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
  block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);
  block->stream_id_ = node_op_desc->GetStreamId();
  block->ref_count_++;
  block->continuous_block_ = continuous;
  block->batch_label_ = batch_label;
  if (mem_type == kOutput) {
@@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
    // hccl task need align header and tail
    block->first_continuous_block_ = true;
    block->last_continuous_block_ = true;
    ++(block->ref_count_);
  } else {
    GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
    return INTERNAL_ERROR;
@@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
                                 return nullptr, "Get no align size failed");

  std::string symbol;
  bool reuse_input = false;
  if (IsSymbolExist(node_index_io, symbol)) {
    block = symbol_blocks_[symbol];
    GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str());
@@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
    block->SetLifeTimeEnd(life_time_);
    block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size);
    block->ref_count_++;
    reuse_input = true;

    // add new size
    align_size = block_size;
@@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
                        workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type);
  }
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr.");
  int out_count_reuse_input = block->ref_count_;
  int out_count = 0;
  GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr);
  auto out_data_anchor = n->GetOutDataAnchor(index);
@@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
      out_count++;
    }
  }
  bool reuse_input = false;
  for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    auto owner_node = in_anchor->GetOwnerNode();
    GE_IF_BOOL_EXEC(owner_node == nullptr, continue);
    auto op_desc = owner_node->GetOpDesc();
    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
    for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
      bool dst_reuse_input = false;
      uint32_t dst_reuse_input_index = 0;
      auto owner_node_op_desc = op_desc->GetOutputDescPtr(i);
      GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue);
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS,
                      GELOGI("Get dst_reuse_input failed"));
      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
                      GELOGI("Get dst_reuse_input_index failed"));
      if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
        out_count_reuse_input += 1;
        reuse_input = true;
      }
    }
  }
  block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count;
  block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1)
                                                      : (block->ref_count_ + out_count);
  return block;
 }

@@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
      GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index,
             node_type_indexs.back().node->GetName().c_str());

      if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
          (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
      bool is_block_matched = false;
      for (auto &node_type_index : node_type_indexs) {
        is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
                           (node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()));
        if (is_block_matched) {
          GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, "
                 "current node:%s, input index:%d, block ref_count:%d.",
                 node_type_index.node->GetName().c_str(), node_type_index.index,
                 node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_);
          break;
        }
      }

      if (is_block_matched) {
        ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
        if (block->ref_count_ == 0 && block->same_stream_) {
          SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
        }
        break;
      }
    }
  }
@@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
  return;
 }

 void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) {
  if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) {
    return;
  }
  int64_t stream_id = node->GetOpDesc()->GetStreamId();
  auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx));
  bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0);
  if (is_suspended) {
    block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1);
    stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block);
    GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, "
           "size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_);
  }
 }

 Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) {
  auto op_desc = node->GetOpDesc();
  int64_t stream_id = op_desc->GetStreamId();
@@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
  // Allocate memory for the current node and release node memory of the same size in the workspace
  GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
                  for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
                       ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
                       ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]);
                                 iter->second[stream_id].clear();});
  if (IsContinuousOutput(node)) {
    return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
  }
@@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
        continue;
      }
      symbol_blocks_[iter->second] = mem_block;
      // The output is suspended, and will be released in allocation of next node.
      CheckAndReleaseSuspendedBlock(node, i, mem_block);
    }
  }
  return SUCCESS;
@@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
    if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) {
      return;
    }
    for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) {
      iter->second[stream_id].clear();
    }
    vector<int64_t> temp;
    int64_t tatal_size = 0;
    GetNodeWorkSpaceSize(n, temp, tatal_size);
@@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
                                           kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag,
                                           is_op_reuse_mem_, false, memory_type);
      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block.");
      ++(mem_block->ref_count_);
      CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type);
    }
    for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) {
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner {

  void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node);

  void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block);

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_;
@@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner {

  std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_;

  std::unordered_map<std::string, uint32_t> node_continuous_input_counts_;
  std::map<std::string, uint32_t> node_continuous_input_counts_;

  // reuse memory
  vector<string> op_no_reuse_mem_vec_;
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,

    GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
        "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
        node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(),
        peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
        output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
        is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  }
@@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node
    }
    GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
           " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
           node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
           out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
           output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
           is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  }
--- a/ge/graph/build/run_context.cc
+++ b/ge/graph/build/run_context.cc
@@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
  // Create rt label
  for (uint32_t i = 0; i < label_num; ++i) {
    rtLabel_t label = nullptr;
    rt_ret = rtLabelCreate(&label);
    rt_ret = rtLabelCreateV2(&label, rt_model_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
      return RT_FAILED;
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
    }
  }

  Status status = ReorderEventNodes();
  Status status = whole_graph_->InsertGraphEvents();
  if (status != SUCCESS) {
    GELOGE(status, "Graph ReorderEventNodes failed");
    return status;
@@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() {
  return SUCCESS;
 }

 Status StreamAllocator::ReorderEventNodes() const {
  Status status = whole_graph_->InsertEventNodes();
  if (status != SUCCESS) {
    GELOGE(status, "Whole graph InsertEventNodes failed");
    return status;
  }
  for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) {
    status = subgraph->InsertEventNodes();
    if (status != SUCCESS) {
      GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str());
      return status;
    }
  }
  return SUCCESS;
 }

 void StreamAllocator::DumpEvents() {
  map<int64_t, vector<NodePtr>> after_refresh_stream_nodes;
  for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {
--- a/ge/graph/build/stream_allocator.h
+++ b/ge/graph/build/stream_allocator.h
@@ -74,7 +74,6 @@ class StreamAllocator {
  Status RefreshContinuousEvents();

  Status InsertSyncEventNodes();
  Status ReorderEventNodes() const;

  void DumpEvents();

--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion
    // and it have no attr or group attr different
    // which means bad case, return error
    bool call_check = true;
    std::unordered_set<int64_t> input_group_ids;
    std::set<int64_t> input_group_ids;
    for (const auto &input_node : node->GetInNodes()) {
      auto iter = nodes_with_group_attr.find(input_node);
      if (iter == nodes_with_group_attr.end()) {
@@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
    return GE_GRAPH_GRAPH_NODE_NULL;
  }

  int64_t node_index = 0;
  for (auto &node : all_nodes) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    op_desc->SetId(node_index++);
  }

  map<int64_t, vector<OpDescPtr>> all_stream_ops;
  for (auto &node : all_nodes) {
    OpDescPtr op_desc = node->GetOpDesc();
@@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin
    }

    if (graph->GetNeedIteration()) {
      if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
      if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
        profiling_point.end_index.insert(current_idx);
        GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
               op_desc->GetName().c_str(), current_idx);
--- a/ge/graph/common/transop_util.h
+++ b/ge/graph/common/transop_util.h
@@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil {

  static TransOpUtil &Instance();

  typedef std::unordered_map<std::string, int> transop_index_op;
  typedef std::map<std::string, int> transop_index_op;
  transop_index_op transop_index_map_;
 };
 }  // namespace ge
--- a/ge/graph/load/model_manager/cpu_queue_schedule.cc
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc
@@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() {
 /// @param [in] outside_addrs: model input/output memory addr
 /// @return: 0 for success / others for failed
 ///
 Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) {
 Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
  if ((args_ != nullptr) || (args_size_ > 0)) {
    GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
    return FAILED;
@@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v
  GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

  AddrMapInfo addr_map_info;
  for (auto &addrs : outside_addrs) {
    auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
    GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
    std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
    for (const auto &virtual_args_addr : virtual_args_addrs) {
      addr_map_info.addr_num += virtual_args_addr.second.size();
    }
  }
  GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

  // init src_addrs/dst_addrs
  size_t index = 0;
  vector<uint64_t> src_addrs;
  vector<uint64_t> dst_addrs;
  for (auto &addrs : outside_addrs) {
    auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
  for (const auto &addrs : outside_addrs) {
    const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
    GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
    std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
    for (const auto &virtual_args_addr : virtual_args_addrs) {
      addr_map_info.addr_num += virtual_args_addr.second.size();
      for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
        src_addrs.push_back(mbuf_list.at(index));
        src_addrs.emplace_back(mbuf_list.at(addrs.first));
        dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
      }
    }
    index++;
  }
  GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

  // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
  GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
--- a/ge/graph/load/model_manager/cpu_queue_schedule.h
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.h
@@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo {
  ~CpuTaskZeroCopy() override;

  Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
  Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);

  Status Distribute() override;
 private:
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
  };

  vector<OpDescPtr> output_op_list;
  set<const void *> input_outside_addrs;
  set<const void *> output_outside_addrs;
  map<uint32_t, OpDescPtr> data_by_index;
  map<string, OpDescPtr> variable_by_name;
  auto nodes = compute_graph->GetAllNodes();
@@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
    GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc);

    if (IsDataOp(op_desc->GetType())) {
      if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) {
      if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) {
        GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
        return PARAM_INVALID;
      }
@@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
    }

    if (op_desc->GetType() == NETOUTPUT) {
      if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) {
      if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) {
        GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
        return PARAM_INVALID;
      }
@@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
 /// @return Status
 ///
 Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
                                map<uint32_t, OpDescPtr> &data_by_index) {
                                map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) {
  // op_desc Checked by Init: Data, valid.
  auto op_desc = node->GetOpDesc();
  if (node->GetOwnerComputeGraph() != graph) {
@@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
    GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str());
    return PARAM_INVALID;
  }
  new_input_data_info_[data_index] = zero_copy_offset;

  for (size_t index = 0; index < virtual_addr_list.size(); ++index) {
    void *addr = virtual_addr_list.at(index);
    if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) {
      continue;
    }
    zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_);
    new_input_outside_addrs_[addr] = zero_copy_offset;
  if (input_outside_addrs.count(virtual_addr) == 0) {
    int64_t output_offset = output_offset_list.at(kDataIndex);
    zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_);
    input_outside_addrs.insert(virtual_addr);
  }
  input_data_info_[data_index] = zero_copy_offset;

  return SUCCESS;
 }
@@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
 /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
 /// @return Status
 Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node,
                                   vector<OpDescPtr> &output_op_list) {
                                   vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) {
  // node->GetOpDesc Checked by Init: NetOutput, valid.
  auto op_desc = node->GetOpDesc();
  // excludes the function op sub graph, e.g. case,if
@@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
    return PARAM_INVALID;
  }

  size_t num = new_output_data_info_.size();
  size_t num = output_data_info_.size();
  bool fusion_flag = false;

  size_t input_count = input_size_list.size();
@@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
    Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag);
    GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.",
                                           op_desc->GetName().c_str()); return PARAM_INVALID;);
    new_output_data_info_[num + idx] = zero_copy_offset;
    void *addr = virtual_addr_list.at(idx);
    int64_t input_offset = input_offset_list.at(idx);
    vector<void *> tensor_addrs;
    zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
    auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset));
    if (!rslt.second) {
    if (output_outside_addrs.count(addr) == 0) {
      vector<void *> tensor_addrs;
      zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
      output_outside_addrs.insert(addr);
      for (size_t i = 0; i < tensor_addrs.size(); ++i) {
        void *real_addr = tensor_addrs.at(i);
        DisableZeroCopy(real_addr);
        real_virtual_addrs_.insert(real_addr);
      }
    } else {
      GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str());
      DisableZeroCopy(addr);
    }

    for (size_t i = 0; i < tensor_addrs.size(); ++i) {
      void *real_addr = tensor_addrs.at(i);
      DisableZeroCopy(real_addr);
      real_virtual_addrs_.insert(real_addr);
    }
    output_data_info_[num + idx] = zero_copy_offset;
  }
  return SUCCESS;
 }
@@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
  }

  rtLabel_t rt_label = nullptr;
  rtError_t rt_error = rtLabelCreateEx(&rt_label, stream);
  rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream);
  if (rt_error != RT_ERROR_NONE || rt_label == nullptr) {
    GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error);
    return INTERNAL_ERROR;
@@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() {
    return SUCCESS;
  }

  if (input_queue_ids_.size() != new_input_data_info_.size()) {
  if (input_queue_ids_.size() != input_data_info_.size()) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu",
           input_queue_ids_.size(), new_input_data_info_.size());
           input_queue_ids_.size(), input_data_info_.size());
    return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
  }

  if (output_queue_ids_.size() != new_output_data_info_.size()) {
  if (output_queue_ids_.size() != output_data_info_.size()) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID,
           "Output queue ids not match model: output_queue=%zu output_data=%zu",
           output_queue_ids_.size(), new_output_data_info_.size());
           output_queue_ids_.size(), output_data_info_.size());
    return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
  }

  GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed.");
  // Binding input_queue and Data Op.
  GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed.");
  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed.");
  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed.");

  // Binding output_queue and NetOutput Op.
  GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed.");
  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed.");
  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed.");

  GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed.");
  GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed.");
@@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() {
 Status DavinciModel::BindInputQueue() {
  // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
  for (size_t i = 0; i < input_queue_ids_.size(); ++i) {
    auto it = new_input_data_info_.find(i);
    if (it == new_input_data_info_.end()) {
      GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i);
    auto it = input_data_info_.find(i);
    if (it == input_data_info_.end()) {
      GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i);
      return FAILED;
    }

@@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
 }

 Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
                                          std::map<const void *, ZeroCopyOffset> &outside_addrs) {
                                          const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
  GELOGI("Set CpuKernel model zero_copy task enter.");
  std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_);
  if (zero_copy == nullptr) {
@@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
 Status DavinciModel::BindOutputQueue() {
  // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
  for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
    auto it = new_output_data_info_.find(i);
    if (it == new_output_data_info_.end()) {
      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
    auto it = output_data_info_.find(i);
    if (it == output_data_info_.end()) {
      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
      return FAILED;
    }

@@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() {

 Status DavinciModel::BindEnqueue() {
  for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
    auto it = new_output_data_info_.find(i);
    if (it == new_output_data_info_.end()) {
      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
    auto it = output_data_info_.find(i);
    if (it == output_data_info_.end()) {
      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
      return FAILED;
    }

@@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs
 Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
  rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
  const std::vector<DataBuffer> &blobs = input_data.blobs;
  for (const auto &data : new_input_data_info_) {
  for (const auto &data : input_data_info_) {
    if (data.first >= blobs.size()) {
      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(),
             new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
             input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
             data.second.GetOpName().c_str());
      return FAILED;
    }
@@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r

  output_data.index = data_id;
  output_data.model_id = model_id_;
  if (output_data.blobs.size() != new_output_data_info_.size()) {
  if (output_data.blobs.size() != output_data_info_.size()) {
    GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(),
           new_output_data_info_.size());
           output_data_info_.size());
    return FAILED;
  }

  std::vector<DataBuffer> &blobs = output_data.blobs;
  size_t idx = 0;
  for (const auto &output : new_output_data_info_) {
  for (const auto &output : output_data_info_) {
    if (output.first >= blobs.size()) {
      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(),
             new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
             input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
      return FAILED;
    }

@@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
 /// @return None.
 ///
 void DavinciModel::SetCopyOnlyOutput() {
  for (const auto &output_outside_addrs : new_output_outside_addrs_) {
  for (const auto &output_outside_addrs : output_data_info_) {
    ZeroCopyOffset output_outside = output_outside_addrs.second;
    if (!output_outside.IsRelativeOffsetValid()) {
      return;
    }
    for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) {
      auto &addrs_mapping_list = output_outside.GetOutsideAddrs();
      std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count];
@@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v
  for (size_t i = 0; i < nums; ++i) {
    std::lock_guard<std::mutex> lock(outside_addrs_mutex_);

    for (auto &input_outside_addrs : new_input_outside_addrs_) {
    for (auto &input_outside_addrs : input_data_info_) {
      ZeroCopyOffset &input_outside = input_outside_addrs.second;
      input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
    }

    for (auto &output_outside_addrs : new_output_outside_addrs_) {
    for (auto &output_outside_addrs : output_data_info_) {
      ZeroCopyOffset &output_outside = output_outside_addrs.second;
      output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
    }
@@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
 /// @return SUCCESS handle successfully / PARAM_INVALID for failed
 ///
 Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) {
  if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
  if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed.");
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
  if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
      SUCCESS) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed.");
    return ACL_ERROR_GE_PARAM_INVALID;
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -675,7 +675,7 @@ class DavinciModel {
  /// @return Status
  ///
  Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
                    map<uint32_t, OpDescPtr> &data_by_index);
                    map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);

  ///
  /// @ingroup ge
@@ -694,7 +694,8 @@ class DavinciModel {
  /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
  /// @return Status
  ///
  Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list);
  Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
                       set<const void *> &output_outside_addrs);

  ///
  /// @ingroup ge
@@ -764,7 +765,7 @@ class DavinciModel {
  ///
  Status BindInputQueue();

  Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs);
  Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);

  ///
  /// @ingroup ge
@@ -897,10 +898,8 @@ class DavinciModel {
  void *global_step_addr_{nullptr};
  uint64_t global_step_size_{0};

  map<uint32_t, ZeroCopyOffset> new_input_data_info_;
  map<uint32_t, ZeroCopyOffset> new_output_data_info_;
  map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
  map<const void *, ZeroCopyOffset> new_output_outside_addrs_;
  map<uint32_t, ZeroCopyOffset> input_data_info_;
  map<uint32_t, ZeroCopyOffset> output_data_info_;

  set<const void *> real_virtual_addrs_;

--- a/ge/graph/load/model_manager/ts_mem_mall.h
+++ b/ge/graph/load/model_manager/ts_mem_mall.h
@@ -100,8 +100,8 @@ class TsMemMall {

 private:
  std::mutex mem_mutex_;
  std::unordered_map<int64_t, void *> mem_store_size_;
  std::unordered_map<void *, int64_t> mem_store_addr_;
  std::map<int64_t, void *> mem_store_size_;
  std::map<void *, int64_t> mem_store_addr_;
  rtMemType_t mem_type_;
 };
 }  // namespace ge
--- a/ge/graph/load/model_manager/zero_copy_offset.cc
+++ b/ge/graph/load/model_manager/zero_copy_offset.cc
@@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const
  }
 }

 void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
                                          bool fusion_flag, std::set<const void *> &real_virtual_addrs) {
 void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag,
                                          set<const void *> &real_virtual_addrs) {
  uint32_t out_count = 0;
  if (!fusion_flag) {
    out_count++;
@@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
    real_virtual_addrs.insert(addr);
  } else {
    GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
    int64_t output_offset = output_offset_list.at(index);
    for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
      if (zero_copy_basic_offset_.at(i) == output_offset) {
        out_count++;
@@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
    }
  }
  addr_count_ = out_count;
  valid_relative_offset_ = true;
 }

 void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
@@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
    }
  }
  addr_count_ = out_count;
  valid_relative_offset_ = true;
 }

 void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
  if (!valid_relative_offset_) {
    return;
  }
  const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
  for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
    auto args_addrs = outside_addrs_[out_count].find(outside_addr);
--- a/ge/graph/load/model_manager/zero_copy_offset.h
+++ b/ge/graph/load/model_manager/zero_copy_offset.h
@@ -43,8 +43,7 @@ class ZeroCopyOffset {
  ~ZeroCopyOffset();

  Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
  void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
                            bool fusion_flag, std::set<const void *> &real_virtual_addrs);
  void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs);

  void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag);
  Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list,
@@ -65,9 +64,10 @@ class ZeroCopyOffset {
  // data_size of Data/Netoutput
  int64_t GetDataSize() const { return data_size_; }
  // value of *outside_addrs_ from davinci_model
  const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
  const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
  // name of op
  std::string GetOpName() const { return op_name_; }
  const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }

 private:
  void *basic_addr_ = nullptr;
@@ -81,6 +81,7 @@ class ZeroCopyOffset {

  std::vector<int64_t> zero_copy_basic_offset_;
  std::vector<int64_t> zero_copy_relative_offset_;
  bool valid_relative_offset_ = false;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -131,7 +131,7 @@ bool IsTailingOptimization() {
 }

 ge::Status CheckFpCeilingMode() {
  static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
  static const std::set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
  string mode;
  auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode);
  if (ret == ge::GRAPH_SUCCESS) {
--- a/ge/graph/manager/graph_var_manager.h
+++ b/ge/graph/manager/graph_var_manager.h
@@ -170,8 +170,8 @@ class VarResource {
  std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_;
  std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_;
  std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_;
  std::unordered_map<std::string, uint32_t> var_names_to_changed_graph_id_;
  std::unordered_map<std::string, uint32_t> var_names_to_allocated_graph_id_;
  std::map<std::string, uint32_t> var_names_to_changed_graph_id_;
  std::map<std::string, uint32_t> var_names_to_allocated_graph_id_;
  std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_;
 };

--- a/ge/graph/partition/graph_partition.cc
+++ b/ge/graph/partition/graph_partition.cc
@@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo
  /// Avoid recursion since stack space might be limited.
  /// We instead keep a stack of nodes to visit.
  std::vector<size_t> temp_stack;
  std::unordered_set<size_t> visited;
  std::set<size_t> visited;
  temp_stack.push_back(src);
  while (!temp_stack.empty()) {
    size_t cluster = temp_stack.back();
--- a/ge/graph/partition/graph_partition.h
+++ b/ge/graph/partition/graph_partition.h
@@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>;
 using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>;
 using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>;
 using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>;
 using ClusterSet = std::unordered_set<size_t>;
 using ClusterSet = std::set<size_t>;
 class Cluster {
 public:
  size_t index_;              // corresponding to rank of node
--- a/ge/graph/passes/constant_folding_pass.cc
+++ b/ge/graph/passes/constant_folding_pass.cc
@@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node,
  return FoldingPass::RunOpKernel(node, inputs, outputs);
 }

 const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>>
 const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
    &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
  return statistic_of_ge_constant_folding_;
 }

 const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>>
 const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
    &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
  return statistic_of_op_constant_folding_;
 }
--- a/ge/graph/passes/constant_folding_pass.h
+++ b/ge/graph/passes/constant_folding_pass.h
@@ -26,11 +26,11 @@ namespace ge {
 class ConstantFoldingPass : public FoldingPass {
 public:
  Status Run(ge::NodePtr &node) override;
  const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const;
  const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const;
  const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const;
  const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const;
 private:
  std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_;
  std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_;
  std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_;
  std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_;
 };
 }  // namespace ge

--- a/ge/graph/passes/hccl_continuous_memcpy_pass.cc
+++ b/ge/graph/passes/hccl_continuous_memcpy_pass.cc
@@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph,
  }
  GELOGI("Create Assign op:%s.", op_desc->GetName().c_str());

  if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) {
    GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str());
    return nullptr;
  }

  graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
  if (ret != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail.");
--- a/ge/graph/passes/hccl_continuous_memcpy_pass.h
+++ b/ge/graph/passes/hccl_continuous_memcpy_pass.h
@@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass {

  bool IsDataNode(const std::string& node_type);

  std::unordered_map<std::string, uint32_t> node_num_map_;
  std::map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge

--- a/ge/graph/passes/hccl_memcpy_pass.h
+++ b/ge/graph/passes/hccl_memcpy_pass.h
@@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass {

  bool IsDataNode(const std::string& node_type);

  std::unordered_map<std::string, uint32_t> node_num_map_;
  std::map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge

--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
  }

  // parser data dynamic info from atc parameter --input_shape
  if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims,
                                          data_to_dynamic_info_) != SUCCESS) {
  if (CheckAndParseDynamicData() != SUCCESS) {
    GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed");
    return PARAM_INVALID;
  }
@@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
  return SUCCESS;
 }

 Status MultiBatchClonePass::CheckAndParseDynamicData() {
  size_t unknown_shape_count = 0;
  auto data_name_and_shape = GetLocalOmgContext().user_input_dims;
  std::vector<std::string> data_name_order;
  for (auto &item : data_name_and_shape) {
    data_name_order.push_back(item.first);
  }
  if (!getnext_sink_dynamic_dims_) {
    for (const auto &node : all_data_nodes_) {
      auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex);
      auto data_shape = data_desc.GetShape();
      auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" :
                         data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others";
      auto data_name = node->GetName();

      const auto &data_shape_dims = data_shape.GetDims();
      if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) {
        continue;
      }
      ++unknown_shape_count;
      auto iter = find(data_name_order.begin(), data_name_order.end(), data_name);
      if (iter == data_name_order.end()) {
        if (!GetLocalOmgContext().dynamic_batch_size.empty()) {
          auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name);
          GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.",
                                               data_name.c_str()); return PARAM_INVALID);
        } else if (!GetLocalOmgContext().dynamic_image_size.empty()) {
          auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format);
          GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.",
                                               data_name.c_str()); return PARAM_INVALID);
        } else if (!GetLocalOmgContext().dynamic_dims.empty()) {
          ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"},
            {"--input_shape", "all dynamic data must be set in --input_shape"});
          GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape",
                 node->GetName().c_str(), data_shape.ToString().c_str());
          return INTERNAL_ERROR;
        }
        data_name_and_shape.emplace_back(data_name, data_shape_dims);
      }
    }
  }
  auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_);
  GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info.");
  if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10040");
    GELOGE(PARAM_INVALID,
           "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims");
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

 Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) {
  data_count_from_getnext_ = 0;
  getnext_sink_dynamic_dims_ = false;
--- a/ge/graph/passes/multi_batch_clone_pass.h
+++ b/ge/graph/passes/multi_batch_clone_pass.h
@@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass {
  /// @return 0: SUCCESS / others: FAILED
  ///
  Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num);
  
  Status CheckAndParseDynamicData();

  std::string session_graph_id_;
  std::vector<std::vector<int64_t>> batch_shapes_;
--- a/ge/graph/passes/switch_to_stream_switch_pass.h
+++ b/ge/graph/passes/switch_to_stream_switch_pass.h
@@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass {
  std::vector<NodePtr> stream_switch_nodes_;
  std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_;
  std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_;
  std::unordered_map<std::string, uint32_t> node_num_map_;
  std::map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){
      }
    }
  }
  auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_);
  auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_);
  GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info.");
  if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10040");
--- a/ge/graph/preprocess/multi_batch_options.cc
+++ b/ge/graph/preprocess/multi_batch_options.cc
@@ -377,7 +377,7 @@ bool InitDynamicParams(vector<vector<int64_t>> &shapes) {
 /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims.
 /// @return true: Configed for Multi batch / false: Not configed for Multi batch.
 ///
 Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
 Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes,
                               vector<pair<string, vector<int64_t>>> &data_name_and_shape,
                               map<string, vector<vector<int64_t>> > &data_to_dynamic_info) {
  size_t cur_data_index = 0;
--- a/ge/graph/preprocess/multi_batch_options.h
+++ b/ge/graph/preprocess/multi_batch_options.h
@@ -74,7 +74,7 @@ Status CalcShape(const std::vector<int64_t> &batch_shape, GeShape &data_shape);
 /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims.
 /// @return SUCCESS / PARAM_INVALID
 ///
 Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
 Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes,
                               vector<pair<string, vector<int64_t>>> &data_name_and_shape,
                               map<string, vector<vector<int64_t>>> &data_to_dynamic_info);

@@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc);
 /// @param [in] const string &data_name: cur data name.
 /// @return 0: true/false
 ///
 bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name);
 GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name);

 ///
 /// @ingroup ge
@@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_nam
 /// @param [in]  const std::string &input_format: format of input.
 /// @return 0: true/false
 ///
 bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
 GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
                                const std::string &input_format);

 }  // namespace multibatch
--- a/ge/host_cpu_engine/CMakeLists.txt
+++ b/ge/host_cpu_engine/CMakeLists.txt
@@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(host_cpu_engine PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(host_cpu_engine PRIVATE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(host_cpu_engine PRIVATE
@@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(host_cpu_engine PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(host_cpu_engine PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(atc_host_cpu_engine PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(atc_host_cpu_engine PRIVATE
    COMPILE_OMG_PACKAGE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(atc_host_cpu_engine PRIVATE
@@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(atc_host_cpu_engine PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(atc_host_cpu_engine PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})
 target_compile_options(host_cpu_opskernel_builder PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(host_cpu_opskernel_builder PRIVATE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(host_cpu_opskernel_builder PRIVATE
@@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(host_cpu_opskernel_builder PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(host_cpu_opskernel_builder PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})
 target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
    google=ascend_private
    FUNC_VISIBILITY
 )

 target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
@@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(atc_host_cpu_opskernel_builder PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
@@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})
 target_compile_options(host_cpu_opskernel_builder_static PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
    google=ascend_private
    LOG_CPP
    FUNC_VISIBILITY
 )

 target_include_directories(host_cpu_opskernel_builder_static PRIVATE
--- a/ge/host_cpu_engine/engine/host_cpu_engine.h
+++ b/ge/host_cpu_engine/engine/host_cpu_engine.h
@@ -17,6 +17,20 @@
 #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
 #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <map>
 #include <memory>
 #include <string>
@@ -32,7 +46,7 @@ namespace host_cpu {
 * host cpu engine.
 * Used for the ops which executes on host.
 */
 class HostCpuEngine {
 class GE_FUNC_VISIBILITY HostCpuEngine {
 public:
  /**
   * get HostCpuEngine instance.
@@ -87,25 +101,25 @@ extern "C" {
 * When Ge start, GE will invoke this interface
 * @return The status whether initialize successfully
 */
 ge::Status Initialize(const map<string, string> &options);
 GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);

 /**
 * After the initialize, GE will invoke this interface to get the Ops kernel Store
 * @param ops_kernel_map The host cpu's ops kernel info
 */
 void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
 GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);

 /**
 * After the initialize, GE will invoke this interface to get the Graph Optimizer
 * @param graph_optimizers The host cpu's Graph Optimizer objs
 */
 void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
 GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);

 /**
 * When the graph finished, GE will invoke this interface
 * @return The status whether initialize successfully
 */
 ge::Status Finalize();
 GE_FUNC_VISIBILITY ge::Status Finalize();
 }

 #endif  // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
--- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
+++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
@@ -17,11 +17,25 @@
 #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_
 #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include "common/opskernel/ops_kernel_builder.h"

 namespace ge {
 namespace host_cpu {
 class HostCpuOpsKernelBuilder : public OpsKernelBuilder {
 class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder {
 public:
  Status Initialize(const map<std::string, std::string> &options) override;

--- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
+++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
@@ -17,6 +17,20 @@
 #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_
 #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <map>
 #include <string>
 #include <vector>
@@ -25,7 +39,7 @@

 namespace ge {
 namespace host_cpu {
 class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
 class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
 public:
  HostCpuOpsKernelInfoStore() {}
  ~HostCpuOpsKernelInfoStore() override = default;
--- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h
+++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h
@@ -21,7 +21,7 @@

 namespace ge {
 namespace host_cpu {
 class HostOp : public Op {
 class GE_FUNC_VISIBILITY HostOp : public Op {
 public:
  HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}
  ~HostOp() override = default;
--- a/ge/host_cpu_engine/ops_kernel_store/op/op.h
+++ b/ge/host_cpu_engine/ops_kernel_store/op/op.h
@@ -29,7 +29,7 @@ namespace host_cpu {
 /**
 * The base class for all op.
 */
 class Op {
 class GE_FUNC_VISIBILITY Op {
 public:
  Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {}
  virtual ~Op() = default;
--- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
+++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte
 /**
 * manage all the op, support create op.
 */
 class OpFactory {
 class GE_FUNC_VISIBILITY OpFactory {
 public:
  static OpFactory &Instance();

@@ -70,7 +70,7 @@ class OpFactory {
  std::vector<std::string> all_ops_;
 };

 class OpRegistrar {
 class GE_FUNC_VISIBILITY OpRegistrar {
 public:
  OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
    OpFactory::Instance().RegisterCreator(type, func);
--- a/ge/hybrid/common/tensor_value.cc
+++ b/ge/hybrid/common/tensor_value.cc
@@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s
 TensorValue::~TensorValue() { Destroy(); }

 void TensorValue::Destroy() {
  if (buffer_ != nullptr || ref_buffer_ != nullptr) {
  if (buffer_ != nullptr) {
    GELOGD("Unref tensor: %s", DebugString().c_str());
    buffer_.reset();
  }
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
  GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_));
  RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End");

  HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call.");
  HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs),
                        "Failed to execute partitioned call.");
  RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");

  HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
  RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");

  args.outputs.clear();
  HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs");
  RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End");
  return SUCCESS;
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
 }

 Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
                                      const std::vector<ConstGeTensorDescPtr> &input_desc) {
                                      const std::vector<ConstGeTensorDescPtr> &input_desc,
                                      const std::vector<TensorValue> &outputs) {
  GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
  GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());

  if (!outputs.empty()) {
    GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
                      "Failed to enable output zero copy by user provided outputs.");
  }
  if (!graph_item_->IsDynamic()) {
    return ExecuteAsyncForKnownShape(inputs);
  }
@@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
  return SUCCESS;
 }

 Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
                                      const std::vector<ConstGeTensorDescPtr> &input_desc) {
  return ExecuteAsync(inputs, input_desc, {});
 }

 Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
  GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
  if (graph_item_->GetAllNodes().size() != 1) {
@@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {

  return SUCCESS;
 }

 Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) {
  GELOGD("To enable zero copy, output number = %zu", outputs.size());
  const auto &output_edges = graph_item_->GetOutputEdges();
  // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
  if (outputs.size() != output_edges.size()) {
    GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
           output_edges.size(),
           outputs.size());
    return PARAM_INVALID;
  }

  for (size_t i = 0; i < outputs.size(); ++i) {
    auto &output_tensor = outputs[i];
    auto &output_node = output_edges[i].first;
    int output_idx = output_edges[i].second;
    GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s",
           graph_item_->GetName().c_str(),
           i,
           output_node->NodeName().c_str(),
           output_idx,
           output_tensor.DebugString().c_str());

    GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
                      "[%s] Failed to set input tensor[%zu]",
                      graph_item_->GetName().c_str(),
                      i);
  }

  GELOGD("Done enabling zero copy for outputs successfully.");
  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/executor/subgraph_executor.h
+++ b/ge/hybrid/executor/subgraph_executor.h
@@ -43,7 +43,19 @@ class SubgraphExecutor {
   * @param input_desc      input tensor descriptions
   * @return SUCCESS on success, error code otherwise
   */
  Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc);
  Status ExecuteAsync(const std::vector<TensorValue> &inputs,
                      const std::vector<ConstGeTensorDescPtr> &input_desc);

  /**
   * Execute subgraph async, output tensor address(not data) and output tensor descriptions are
   * valid after this method returned
   * @param inputs          input tensors
   * @param input_desc      input tensor descriptions
   * @return SUCCESS on success, error code otherwise
   */
  Status ExecuteAsync(const std::vector<TensorValue> &inputs,
                      const std::vector<ConstGeTensorDescPtr> &input_desc,
                      const std::vector<TensorValue> &outputs);

  /**
   * Execute subgraph async, output tensor address(not data) and output tensor descriptions are
@@ -76,6 +88,7 @@ class SubgraphExecutor {

 private:
  Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state);
  Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs);
  static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state);
  Status Init(const std::vector<TensorValue> &inputs,
              const std::vector<ConstGeTensorDescPtr> &input_desc);
--- a/ge/hybrid/model/hybrid_model.cc
+++ b/ge/hybrid/model/hybrid_model.cc
@@ -40,9 +40,14 @@ HybridModel::~HybridModel() {
  GELOGD("[%s] HybridModel destroyed.", model_name_.c_str());
 }

 Status HybridModel::Init() {
 Status HybridModel::Init(bool is_single_op) {
  GELOGD("Start to init hybrid model.");
  GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
  is_single_op_ = is_single_op;
  if (is_single_op) {
    GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model.");
  } else {
    GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
  }
  GELOGD("HybridModel initialized successfully.");
  return SUCCESS;
 }
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@@ -37,7 +37,7 @@ class HybridModel {

  ~HybridModel();

  Status Init();
  Status Init(bool is_single_op = false);

  const NodeItem *GetNodeItem(const NodePtr &node) const;

@@ -69,6 +69,10 @@ class HybridModel {
    return model_id_;
  }

  bool IsSingleOp() const {
    return is_single_op_;
  }

  TensorValue* GetVariable(const string &name) const;

  NodePtr GetVariableNode(const string &name) const;
@@ -131,11 +135,13 @@ class HybridModel {
  std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;

  bool is_new_model_desc_ = false;    // support aipp
  bool is_single_op_ = false;

  // runtime fields
  uint32_t device_id_ = 0;
  uint32_t model_id_ = 0;
  uint8_t *var_mem_base_ = nullptr;
  std::unique_ptr<TensorBuffer> weight_buffer_;
  RuntimeParam root_runtime_param_;
 };
 }  // namespace hybrid
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() {
  return SUCCESS;
 }

 Status HybridModelBuilder::BuildForSingleOp() {
  GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
  hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
  GELOGI("[%s] Start to build hybrid model.", GetGraphName());
  auto ret = ge_root_model_->GetSubgraphInstanceNameToModel();
  const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()];
  GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model),
                    "[%s] Failed to index task defs", GetGraphName());
  GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
  GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
  GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
  GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName());
  return SUCCESS;
 }

 Status HybridModelBuilder::ValidateParams() {
  GE_CHECK_NOTNULL(ge_root_model_);
  GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph());
@@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() {
 }

 Status HybridModelBuilder::InitWeights() {
  // For constant in root graph
  const auto &root_graph = ge_root_model_->GetRootGraph();
  const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel();
  auto iter = subgraph_models.find(root_graph->GetName());
  if (iter == subgraph_models.end()) {
    GELOGD("Root graph model not found");
    return SUCCESS;
  }

  auto &root_model = iter->second;
  const auto &weight_buffer = root_model->GetWeight();
  if (weight_buffer.GetSize() == 0) {
    GELOGD("weight is empty");
    return SUCCESS;
  }

  auto allocator = NpuMemoryAllocator::GetAllocator();
  GE_CHECK_NOTNULL(allocator);

  for (auto &it : hybrid_model_.node_items_) {
    auto &node_item = it.second;
    if (node_item->node_type != CONSTANT) {
  hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size());
  GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_);
  auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData());
  GE_CHK_RT_RET(rtMemcpy(weight_base,
                         hybrid_model_.weight_buffer_->GetSize(),
                         weight_buffer.GetData(),
                         weight_buffer.GetSize(),
                         RT_MEMCPY_HOST_TO_DEVICE));

  GELOGI("Init weight mem successfully, weight base %p, weight size = %zu",
         weight_base,
         hybrid_model_.weight_buffer_->GetSize());
  for (auto &node : root_graph->GetDirectNode()) {
    if (node->GetType() != CONSTANT) {
      continue;
    }

    const auto &constant_node = node_item->node;
    auto op_desc = constant_node->GetOpDesc();
    auto op_desc = node->GetOpDesc();
    auto v_weights = ModelUtils::GetWeights(op_desc);
    if (v_weights.empty()) {
      GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str());
      return INTERNAL_ERROR;
    }
    auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
    auto output_desc = op_desc->MutableOutputDesc(0);
    GE_CHECK_NOTNULL(output_desc);
    auto tensor_size = ge_tensor->GetData().GetSize();
    GELOGD("[%s] Start to init Constant node [%s], size = %ld",
    GE_CHECK_NOTNULL(ge_tensor);
    const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc();
    int64_t tensor_size = 0;
    GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size),
                            "[%s] Failed to get tensor size",
                            node->GetName().c_str());
    int64_t data_offset = 0;
    GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset),
                            "[%s] Failed to get data offset",
                            node->GetName().c_str());
    GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld",
           GetGraphName(),
           constant_node->GetName().c_str(),
           tensor_size);
           node->GetName().c_str(),
           tensor_size,
           data_offset);

    auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
    auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size);
    GE_CHECK_NOTNULL(tensor_buffer);
    std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer)));
    GE_CHECK_NOTNULL(constant_tensor);
    constant_tensor->SetName("Constant_" + op_desc->GetName());
    if (tensor_size > 0) {
      GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(),
                             constant_tensor->GetSize(),
                             ge_tensor->GetData().data(),
                             ge_tensor->GetData().size(),
                             RT_MEMCPY_HOST_TO_DEVICE));
    }

    hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor));
    GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size);
    hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor));
    GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size);
  }
  return SUCCESS;
 }
@@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr
  return SUCCESS;
 }

 Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) {
  // index task defs
  GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str());
  std::unordered_map<int64_t, NodePtr> node_map;
  for (const auto &node : sub_graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(node);
    GE_CHECK_NOTNULL(node->GetOpDesc());
    auto node_id = node->GetOpDesc()->GetId();
    GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str());
    node_map.emplace(node_id, node);
  }

  auto tasks = ge_model->GetModelTaskDefPtr()->task();
  for (int i = 0; i < tasks.size(); ++i) {
    const domi::TaskDef &task_def = tasks[i];
    GELOGI("Task id = %d, task type = %d", i, task_def.type());
    auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
    uint32_t op_index = -1;
    if (task_type == RT_MODEL_TASK_KERNEL) {
      op_index = task_def.kernel().context().op_index();
    } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
      op_index = task_def.kernel_ex().op_index();
    } else if (task_type == RT_MODEL_TASK_HCCL) {
      op_index = task_def.kernel_hccl().op_index();
    } else {
      GELOGD("Skip task type: %d", static_cast<int>(task_type));
      continue;
    }

    auto iter = node_map.find(op_index);
    if (iter == node_map.end()) {
      GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index);
      return INTERNAL_ERROR;
    }

    auto &node = iter->second;
    if (task_type == RT_MODEL_TASK_KERNEL) {
      ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
    }

    GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index);
    hybrid_model_.task_defs_[node].emplace_back(task_def);
  }

  return SUCCESS;
 }

 Status HybridModelBuilder::IndexTaskDefs() {
  const auto &root_graph = ge_root_model_->GetRootGraph();
  if (SetOutputNameAttr(*root_graph) != SUCCESS) {
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -35,6 +35,7 @@ class HybridModelBuilder {
  explicit HybridModelBuilder(HybridModel &hybrid_model);
  ~HybridModelBuilder() = default;
  Status Build();
  Status BuildForSingleOp();

 private:
  static Status UpdateAnchorStatus(const NodePtr &node);
@@ -64,6 +65,7 @@ class HybridModelBuilder {
  Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies);
  Status ParseDependentForFusedSubgraph(NodeItem &node_item);
  Status IndexTaskDefs();
  Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model);
  Status IndexSpecialNodes();
  Status InitRuntimeParams();
  Status InitModelMem();
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() {
 Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
  GE_CHECK_NOTNULL(node);
  GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str());
  bool is_single_op = model.IsSingleOp();

  auto *task_defs = model.GetTaskDefs(node);
  if (task_defs == nullptr || task_defs->empty()) {
@@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod

  AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs);
  std::unique_ptr<NodeTask> node_task;
  GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str());
  GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op),
                    "[%s] Failed to build op tasks.", node->GetName().c_str());
  task = std::move(node_task);
  GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str());
  return SUCCESS;
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
  }
  TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
  rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
  if (rt_ret != RT_ERROR_NONE) {
  if (rt_ret != RT_ERROR_NONE || is_single_op_) {
    void *bin_handle = nullptr;
    if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
      GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -50,6 +50,8 @@ class AiCoreOpTask {

  uint32_t GetBlockDim() const {return block_dim_;}

  void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;};

 protected:
  Status UpdateTilingInfo(TaskContext &context);
  virtual std::string GetKeyForOpParamSize() const;
@@ -72,6 +74,7 @@ class AiCoreOpTask {
  uint32_t args_size_ = 0;
  uint32_t block_dim_ = 1;
  bool clear_atomic_ = true;
  bool is_single_op_ = false;
  std::vector<int> output_indices_to_skip_;
 };

--- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc
@@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector
    : op_desc_(op_desc), task_defs_(task_defs) {
 }

 Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) {
 Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task,
                                    bool ignore_failure_on_atomic,
                                    bool is_single_op) {
  GE_CHECK_NOTNULL(op_desc_);
  if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) {
    GELOGE(INTERNAL_ERROR,
@@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i
    auto atomic_task =
        std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask());
    GE_CHECK_NOTNULL(atomic_task);
    atomic_task->SetSingleOp(is_single_op);
    GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()),
                      "[%s] Failed to init task for AtomicAddrClean",
                      op_desc_->GetName().c_str());
@@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i
  // build aicore task
  auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask());
  GE_CHECK_NOTNULL(aicore_task);
  aicore_task->SetSingleOp(is_single_op);
  GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()),
                    "[%s] Failed to init task for AtomicAddrClean",
                    op_desc_->GetName().c_str());
--- a/ge/hybrid/node_executor/aicore/aicore_task_builder.h
+++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.h
@@ -47,7 +47,7 @@ class AiCoreTaskBuilder {
  AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs);
  ~AiCoreTaskBuilder() = default;

  Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic);
  Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false);

 private:
  bool ExpectAtomicAddrCleanTask();
--- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
+++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
@@ -27,7 +27,7 @@ namespace ge {
 namespace hybrid {
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor);

 const std::unordered_map<std::string, std::vector<uint32_t>>
 const std::map<std::string, std::vector<uint32_t>>
    RefInputTask::out_ref_input_index_ = {{DATA, {}},
                                          {AIPPDATA, {}},
                                          {RESHAPE, {}},
@@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<uint32_t>>
                                          {BROADCASTGRADIENTARGS, {}}
                                         };

 const std::unordered_set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE};
 const std::set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE};

 Status RefInputTask::UpdateArgs(TaskContext &) {
  // no need update args
--- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h
+++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h
@@ -46,7 +46,7 @@ class RefInputTask : public NodeTask {

  // key is op type, value is output ref input index,
  // e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one
  static const std::unordered_map<std::string, std::vector<uint32_t>> out_ref_input_index_;
  static const std::map<std::string, std::vector<uint32_t>> out_ref_input_index_;
 };

 class DependInputShapeTask : public NodeTask {
@@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask {
  const NodePtr node_;

  // ops depend input shape
  static const std::unordered_set<std::string> depend_input_shape_ops_;
  static const std::set<std::string> depend_input_shape_ops_;
 };

 class ConstantNodeTask : public NodeTask {
--- a/ge/init/gelib.h
+++ b/ge/init/gelib.h
@@ -31,7 +31,7 @@ using std::map;
 using std::vector;

 namespace ge {
 class GELib {
 class GE_FUNC_VISIBILITY GELib {
 public:
  GELib() = default;
  ~GELib() = default;
--- a/ge/ir_build/atc_ir_common.cc
+++ b/ge/ir_build/atc_ir_common.cc
@@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) {
  return ge::SUCCESS;
 }

 bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
 bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                          std::string &dynamic_batch_size) {
  int32_t size = 0;
  for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) {
@@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>>
  return true;
 }

 bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
 bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                          const std::string input_format, std::string &dynamic_image_size) {
  if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) {
    GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str());
@@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>>
  return true;
 }

 bool CheckDynamicDimsInputShapeValid(const unordered_map<string, vector<int64_t>> &shape_map,
 bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_map,
                                     string input_format, string &dynamic_dims) {
  if (input_format != "ND") {
    ErrorManager::GetInstance().ATCReportErrMessage(
@@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
    return ge::SUCCESS;
  }

  unordered_map<string, vector<int64_t>> shape_map;
  map<string, vector<int64_t>> shape_map;
  vector<pair<string, vector<int64_t>>> user_shape_map;
  is_dynamic_input = true;
  if (input_shape.empty()) {
@@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
  return ge::SUCCESS;
 }

 bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
 bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &shape_map,
                     vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) {
  vector<string> shape_vec = StringUtils::Split(input_shape, ';');
  const int DEFAULT_SHAPE_PAIR_SIZE = 2;
--- a/ge/ir_build/atc_ir_common.h
+++ b/ge/ir_build/atc_ir_common.h
@@ -46,13 +46,13 @@ static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat =
 static const std::string kEnableCompressWeightTrue = "1";
 static const std::string kEnableCompressWeightFalse = "0";

 bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
 bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                          std::string &dynamic_batch_size);

 bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
 bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                          const std::string input_format, std::string &dynamic_image_size);

 bool CheckDynamicDimsInputShapeValid(const std::unordered_map<std::string, std::vector<int64_t>> &shape_map,
 bool CheckDynamicDimsInputShapeValid(const std::map<std::string, std::vector<int64_t>> &shape_map,
                                     std::string input_format, std::string &dynamic_dims);

 bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims);
@@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string
                                   std::string &dynamic_dims, const std::string input_shape,
                                   const std::string input_format, bool &is_dynamic_input);

 bool ParseInputShape(const std::string &input_shape, std::unordered_map<string, std::vector<int64_t>> &shape_map,
 bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map,
                     std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false);

 Status CheckOutputTypeParamValid(const std::string output_type);
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) {
  if (options_.find(kInputShape) == options_.end()) {
    return GRAPH_SUCCESS;
  }
  unordered_map<string, vector<int64_t>> shape_map;
  map<string, vector<int64_t>> shape_map;
  vector<pair<string, vector<int64_t>>> user_shape_map;
  GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true),
    return GRAPH_PARAM_INVALID, "parse input shape failed!");
--- a/ge/offline/CMakeLists.txt
+++ b/ge/offline/CMakeLists.txt
@@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE
    -O2
    -Wno-deprecated-declarations
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(atc_atc.bin PRIVATE
@@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE
    COMPILE_OMG_PACKAGE
    google=ascend_private
    LOG_CPP
    FUNC_VISIBILITY
 )

 target_include_directories(atc_atc.bin PRIVATE
@@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_options(atc_atc.bin PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(atc_atc.bin PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
@@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE
    -O2
    -Wno-deprecated-declarations
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(fwk_atc.bin PRIVATE
@@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE
    COMPILE_OMG_PACKAGE
    google=ascend_private
    LOG_CPP
    FUNC_VISIBILITY
 )

 target_include_directories(fwk_atc.bin PRIVATE
@@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_options(fwk_atc.bin PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(fwk_atc.bin PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
--- a/ge/opskernel_manager/ops_kernel_builder_manager.h
+++ b/ge/opskernel_manager/ops_kernel_builder_manager.h
@@ -23,7 +23,7 @@

 namespace ge {
 using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>;
 class OpsKernelBuilderManager {
 class GE_FUNC_VISIBILITY OpsKernelBuilderManager {
 public:
  ~OpsKernelBuilderManager();

--- a/ge/opskernel_manager/ops_kernel_manager.h
+++ b/ge/opskernel_manager/ops_kernel_manager.h
@@ -41,7 +41,7 @@ using std::vector;
 namespace ge {
 using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>;

 class OpsKernelManager {
 class GE_FUNC_VISIBILITY OpsKernelManager {
 public:
  friend class GELib;

--- a/ge/plugin/engine/CMakeLists.txt
+++ b/ge/plugin/engine/CMakeLists.txt
@@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST})
 target_compile_options(engine PRIVATE
    -Werror
    -fno-common
    -fvisibility=hidden
 )

 target_compile_definitions(engine PRIVATE
    REUSE_MEMORY=1
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    FUNC_VISIBILITY
 )

 target_include_directories(engine PRIVATE
@@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_options(engine PRIVATE
    -Wl,-Bsymbolic
 )

 target_link_libraries(engine PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    -Wl,--no-as-needed
--- a/ge/plugin/engine/dnnengines.h
+++ b/ge/plugin/engine/dnnengines.h
@@ -25,7 +25,7 @@
 #include "plugin/engine/engine_manage.h"

 namespace ge {
 class AICoreDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine {
 public:
  AICoreDNNEngine() = default;
  explicit AICoreDNNEngine(const std::string &engine_name);
@@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine {
  DNNEngineAttribute engine_attribute_;
 };

 class VectorCoreDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine {
 public:
  VectorCoreDNNEngine() = default;
  explicit VectorCoreDNNEngine(const std::string &engine_name);
@@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine {
 };


 class AICpuDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine {
 public:
  AICpuDNNEngine() = default;
  explicit AICpuDNNEngine(const std::string &engine_name);
@@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine {
  DNNEngineAttribute engine_attribute_;
 };

 class AICpuTFDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine {
 public:
  AICpuTFDNNEngine() = default;
  explicit AICpuTFDNNEngine(const std::string &engine_name);
@@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine {
  DNNEngineAttribute engine_attribute_;
 };

 class GeLocalDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine {
 public:
  GeLocalDNNEngine() = default;
  explicit GeLocalDNNEngine(const std::string &engine_name);
@@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine {
  DNNEngineAttribute engine_attribute_;
 };

 class HostCpuDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine {
 public:
  HostCpuDNNEngine() = default;
  explicit HostCpuDNNEngine(const std::string &engine_name);
@@ -116,7 +116,7 @@ private:
  DNNEngineAttribute engine_attribute_;
 };

 class RtsDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine {
 public:
  RtsDNNEngine() = default;
  explicit RtsDNNEngine(const std::string &engine_name);
@@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine {
  DNNEngineAttribute engine_attribute_;
 };

 class HcclDNNEngine : public DNNEngine {
 class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine {
 public:
  HcclDNNEngine() = default;
  explicit HcclDNNEngine(const std::string &engine_name);
--- a/ge/plugin/engine/engine_manage.h
+++ b/ge/plugin/engine/engine_manage.h
@@ -17,6 +17,20 @@
 #ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_
 #define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <map>
 #include <memory>
 #include <string>
@@ -26,7 +40,7 @@

 namespace ge {
 using DNNEnginePtr = std::shared_ptr<DNNEngine>;
 class EngineManager {
 class GE_FUNC_VISIBILITY EngineManager {
 public:
  static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr);
  static DNNEnginePtr GetEngine(const std::string &engine_name);
@@ -34,7 +48,7 @@ class EngineManager {
 };

 extern "C" {
 void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines);
 GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines);
 }
 }  // namespace ge
 #endif  // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_
--- a/ge/session/inner_session.cc
+++ b/ge/session/inner_session.cc
@@ -77,6 +77,23 @@ Status InnerSession::Initialize() {

  UpdateThreadContext(std::map<std::string, std::string>{});

  // session device id set here
  std::string str_session_device_id;
  if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) {
    GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str());

    uint32_t session_device_id = 0;
    try {
      session_device_id = static_cast<uint32_t>(std::stoi(str_session_device_id.c_str()));
      // session device id has priority
      GetContext().SetCtxDeviceId(session_device_id);
    } catch (std::invalid_argument &) {
      GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str());
    } catch (std::out_of_range &) {
      GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str());
    }
  }

  GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId()));

  DumpProperties dump_properties;
--- a/ge/session/omg.cc
+++ b/ge/session/omg.cc
@@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format,
  }

  // Analyze the input shape paramete
  unordered_map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims;
  map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims;

  if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims,
                           is_dynamic_input) ||
@@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) {
 ///
 static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) {
  GE_CHECK_NOTNULL(graph);
  unordered_map<string, string> graphNodeTypes;
  map<string, string> graphNodeTypes;
  for (const NodePtr &node : graph->GetAllNodes()) {
    auto op_desc = node->GetOpDesc();
    if (op_desc == nullptr) {
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
                                     const vector<DataBuffer> &input_buffers,
                                     vector<GeTensorDesc> &output_desc,
                                     vector<DataBuffer> &output_buffers) {
  GE_CHECK_NOTNULL(op_task_);
  GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
  if (hybrid_model_executor_ != nullptr) {
    GELOGD("Execute multi-task dynamic single op by hybrid model executor");
    hybrid::HybridModelExecutor::ExecuteArgs args;
    for (auto &input : input_buffers) {
      args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
    }
    for (auto &output : output_buffers) {
      args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
    }
    for (auto &tensor_desc : input_desc) {
      auto desc = MakeShared<GeTensorDesc>(tensor_desc);
      GE_CHECK_NOTNULL(desc);
      args.input_desc.emplace_back(desc);
    }

    return hybrid_model_executor_->Execute(args);
  }

  std::lock_guard<std::mutex> lk(*stream_mutex_);
  GE_CHECK_NOTNULL(op_task_);

  GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
  GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
--- a/ge/single_op/single_op.h
+++ b/ge/single_op/single_op.h
@@ -28,6 +28,7 @@
 #include "runtime/stream.h"
 #include "task/op_task.h"
 #include "cce/aicpu_engine_struct.h"
 #include "hybrid/executor/hybrid_model_executor.h"

 namespace ge {
 class StreamResource;
@@ -46,7 +47,7 @@ class SingleOp {
  Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);

  friend class SingleOpModel;
  StreamResource *stream_resource_;
  StreamResource *stream_resource_ = nullptr;
  std::mutex *stream_mutex_;
  rtStream_t stream_ = nullptr;
  std::vector<void *> input_addr_list_;
@@ -77,6 +78,8 @@ class DynamicSingleOp {
                        std::vector<DataBuffer> &outputs) const;

  std::unique_ptr<OpTask> op_task_;
  std::unique_ptr<hybrid::HybridModel> hybrid_model_;
  std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
  uintptr_t resource_id_ = 0;
  std::mutex *stream_mutex_;
  rtStream_t stream_ = nullptr;
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -31,6 +31,8 @@
 #include "task/aicpu_task_builder.h"
 #include "task/aicpu_kernel_task_builder.h"
 #include "task/tbe_task_builder.h"
 #include "hybrid/executor/hybrid_model_executor.h"
 #include "hybrid/node_executor/node_executor.h"

 static std::atomic<std::uint64_t> aicpu_kernel_id(0);

@@ -42,6 +44,20 @@ namespace ge {
 namespace {
 const size_t kDataOutputNum = 1;
 }  // namespace
 static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
  auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  for (const auto &node : comp_graph->GetAllNodes()) {
    auto op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    const auto &depends = op_desc->GetOpInferDepends();
    if (!depends.empty()) {
      flag = true;
      return SUCCESS;
    }
  }
  return SUCCESS;
 }

 SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size)
    : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {}

@@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
  single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
  GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
  model_params_.memory_size = UINT_MAX;

  auto ge_model = model_helper_.GetGeModel();
  GE_CHECK_NOTNULL(ge_model);
  bool infer_depend_flag = false;
  GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag));
  if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) {
    GELOGD("Build single op HybridModel.");
    GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
    auto root_model = model_helper_.GetGeRootModel();
    GE_CHECK_NOTNULL(root_model);
    root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
    root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
    single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
    GE_CHECK_NOTNULL(single_op.hybrid_model_);
    GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model");
    int32_t device_id = 0;
    GE_CHK_RT_RET(rtGetDevice(&device_id));
    single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
                                                                                         device_id,
                                                                                         resource.GetStream()));
    GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
    GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model");
    return SUCCESS;
  }
  return BuildTaskListForDynamicOp(single_op);
 }
 }  // namespace ge
--- a/ge/single_op/stream_resource.cc
+++ b/ge/single_op/stream_resource.cc
@@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) {
  return it->second.get();
 }

 rtStream_t StreamResource::GetStream() const {
  return stream_;
 }

 void StreamResource::SetStream(rtStream_t stream) {
  stream_ = stream;
 }
--- a/ge/single_op/stream_resource.h
+++ b/ge/single_op/stream_resource.h
@@ -37,6 +37,7 @@ class StreamResource {
  StreamResource(StreamResource &&) = delete;
  StreamResource &operator=(const StreamResource &) = delete;
  StreamResource &operator=(StreamResource &&) = delete;
  rtStream_t GetStream() const;
  void SetStream(rtStream_t stream);

  SingleOp *GetOperator(const void *key);
--- a/ge/stub/gen_stubapi.py
+++ b/ge/stub/gen_stubapi.py
@@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve
 """
    this attr is used for symbol table visible
 """
 GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
 GE_ATTR = 'GE_FUNC_VISIBILITY'

 """
    generate stub func body by return type
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map<AscendString
 }

 // Initialize GE
 ATTRIBUTED_DEPRECATED(Status GEInitialize(const std::map<AscendString, AscendString> &))
 Status GEInitialize(const std::map<std::string, std::string> &options);
 ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &))
 GE_FUNC_VISIBILITY Status GEInitialize(const std::map<std::string, std::string> &options);

 Status GEInitialize(const std::map<AscendString, AscendString> &options);
 GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &options);

 // Finalize GE, release all resources
 Status GEFinalize();
 GE_FUNC_VISIBILITY Status GEFinalize();

 class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session {
 class GE_FUNC_VISIBILITY Session {
 public:
  ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &))
  explicit Session(const std::map<std::string, std::string> &options);
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -28,7 +28,7 @@ namespace ge {
 #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead."))
 #endif

 class StatusFactory {
 class GE_FUNC_VISIBILITY StatusFactory {
 public:
  static StatusFactory *Instance() {
    static StatusFactory instance;
@@ -70,7 +70,7 @@ class StatusFactory {
  std::map<uint32_t, std::string> err_desc_;
 };

 class ErrorNoRegisterar {
 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
 public:
  ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
  ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
--- a/inc/external/ge/ge_error_codes.h
+++ b/inc/external/ge/ge_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <stddef.h>

 #ifdef __cplusplus
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_IR_BUILD_H_
 #define INC_EXTERNAL_GE_IR_BUILD_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <string>
 #include <map>
 #include <memory>
@@ -44,17 +58,17 @@ struct ModelBufferData {
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &))
 graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options);
 ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &))
 GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options);

 graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options);
 GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options);

 /**
 * @ingroup AscendCL
 * @brief build model.Notice the model is stored in buffer
 *
 */
 void aclgrphBuildFinalize();
 GE_FUNC_VISIBILITY void aclgrphBuildFinalize();

 /**
 * @ingroup AscendCL
@@ -66,12 +80,12 @@ void aclgrphBuildFinalize();
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &,
 ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &,
                                                    ModelBufferData &))
 graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options,
 GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options,
                              ModelBufferData &model);

 graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options,
 GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options,
                              ModelBufferData &model);

 /**
@@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendStrin
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ATTRIBUTED_DEPRECATED(graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
 graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);
 ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
 GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);

 graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
 GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);

 /**
 * @ingroup AscendCL
@@ -98,7 +112,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);
 GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);

 /**
 * @ingroup AscendCL
@@ -110,7 +124,7 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
 GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);

 /**
 * @ingroup AscendCL
@@ -123,7 +137,7 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs,
 GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs,
                                 const std::vector<TensorDesc> &outputs, Graph &graph);

 };      // namespace ge
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -37,7 +37,7 @@ extern "C" {
 // trace status of log
 enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };

 class GeLog {
 class GE_FUNC_VISIBILITY GeLog {
 public:
  static uint64_t GetTid() {
 #ifdef __GNUC__
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -278,7 +278,7 @@
  } while (0)

 template <typename T>
 std::string FmtToStr(const T &t) {
 GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
  std::string fmt;
  std::stringstream st;
  st << "[" << t << "]";
--- a/inc/framework/common/fmk_error_codes.h
+++ b/inc/framework/common/fmk_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_
 #define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <map>
 #include <string>

@@ -38,7 +52,7 @@ const int MODID_OME = 2;          // OME module ID
 const int MODID_CALIBRATION = 3;  // Calibration module ID

 namespace domi {
 class StatusFactory {
 class GE_FUNC_VISIBILITY StatusFactory {
 public:
  static StatusFactory *Instance();

@@ -54,7 +68,7 @@ class StatusFactory {
  std::map<uint32_t, std::string> err_desc_;
 };

 class ErrorNoRegisterar {
 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
 public:
  ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
  ~ErrorNoRegisterar() {}
--- a/inc/framework/common/ge_format_util.h
+++ b/inc/framework/common/ge_format_util.h
@@ -23,7 +23,7 @@
 #include "graph/tensor.h"

 namespace ge {
 class GeFormatUtil {
 class GE_FUNC_VISIBILITY GeFormatUtil {
 public:
  ///
  /// @name   TransShape
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -215,7 +215,7 @@ struct ModelInfo {
 };

 // Asynchronous callback interface, implemented by the caller
 class ModelListener {
 class GE_FUNC_VISIBILITY ModelListener {
 public:
  virtual ~ModelListener() {}
  ///
--- a/inc/framework/common/gflags_util.h
+++ b/inc/framework/common/gflags_util.h
@@ -17,11 +17,25 @@
 #ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_
 #define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <gflags/gflags.h>
 #include <string>

 namespace ge {
 class GflagsUtils {
 class GE_FUNC_VISIBILITY GflagsUtils {
 public:
  static bool IsSetCommandTrue(const char *name) {
    std::string out;
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -28,7 +28,7 @@
 #include "model/ge_root_model.h"

 namespace ge {
 class ModelHelper {
 class GE_FUNC_VISIBILITY ModelHelper {
 public:
  ModelHelper() = default;
  ~ModelHelper();