diff --git a/.gitignore b/.gitignore
index 8a4003cf..891c0f87 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 /build
 /output
 /prebuilts
+/cov
 *.ir
 *.out
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 29be9eda..186ef4e6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC)
         find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
         find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
         if(PLATFORM STREQUAL "train")
-            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
             find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
             find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
-            find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
             find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC)
         elseif(PLATFORM STREQUAL "inference")
             find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
             find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
-            find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
-            find_module(resource libresource.so ${ASCEND_ATC_DIR})
+	    find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
             find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
-            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
             if(PRODUCT STREQUAL "flr3")
             elseif(PRODUCT STREQUAL "flr1")
                 find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC)
                 find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
             endif()
         elseif(PLATFORM STREQUAL "all")
-            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
-            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
-            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
-            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
-            find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
-            find_module(resource libresource.so ${ASCEND_ATC_DIR})
-            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
-            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
+	    find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
+	    find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
+	    find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
             find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
-            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
-            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
+	    find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
+	    find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
+	    find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
+	    find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
         else()
             message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
         endif()
diff --git a/build.sh b/build.sh
index f2fafd48..7b6da560 100644
--- a/build.sh
+++ b/build.sh
@@ -166,14 +166,14 @@ build_graphengine()
     echo "execute command: cmake ${CMAKE_ARGS} .. failed."
     return 1
   fi
-  COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
+  COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
   TARGET=${COMMON_TARGET}
   if [ "x${PLATFORM}" = "xtrain" ]
   then
-    TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}"
+    TARGET="ge_runner fwk_atc.bin ${TARGET}"
   elif [ "x${PLATFORM}" = "xinference" ]
   then
-    TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}"
+    TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}"
   elif [ "X$ENABLE_GE_UT" = "Xon" ]
   then
     TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest"
@@ -183,7 +183,7 @@ build_graphengine()
   elif [ "x${PLATFORM}" = "xall" ]
   then
     # build all the target
-    TARGET=""
+    TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}"
   fi
   
   make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install
@@ -198,8 +198,6 @@ g++ -v
 mk_dir ${OUTPUT_PATH}
 build_graphengine || { echo "GraphEngine build failed."; return; }
 echo "---------------- GraphEngine build finished ----------------"
-#cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}"
-#rm -rf "${OUTPUT_PATH}/"libproto*
 rm -f ${OUTPUT_PATH}/libgmock*.so
 rm -f ${OUTPUT_PATH}/libgtest*.so
 rm -f ${OUTPUT_PATH}/lib*_stub.so
@@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500
 
 echo "---------------- GraphEngine output generated ----------------"
 
-# if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then
-#     cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH}
-# fi
-
 if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
     cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH}
     cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH}
@@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
     cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH}
     cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH}
 
-#     if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then
-#         export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH}
-#         echo ${LD_LIBRARY_PATH}
     ${OUTPUT_PATH}/ut_libgraph &&
     ${OUTPUT_PATH}/ut_libge_multiparts_utest &&
     ${OUTPUT_PATH}/ut_libge_distinct_load_utest &&
@@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
         echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!"
         exit 1;
     fi
-#     fi
-
-#     if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
-        echo "Generating coverage statistics, please wait..."
-        cd ${BASEPATH}
-        rm -rf ${BASEPATH}/cov
-        mkdir ${BASEPATH}/cov
-        lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
-        lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
-        cd ${BASEPATH}/cov
-        genhtml coverage.info
+    echo "Generating coverage statistics, please wait..."
+    cd ${BASEPATH}
+    rm -rf ${BASEPATH}/cov
+    mkdir ${BASEPATH}/cov
+    lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
+    lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
+    cd ${BASEPATH}/cov
+    genhtml coverage.info
 fi
 
 # generate output package in tar form, including ut/st libraries/executables
@@ -256,6 +244,8 @@ generate_package()
   ATC_PATH="atc/lib64"
   ATC_BIN_PATH="atc/bin"
   FWK_BIN_PATH="fwkacllib/bin"
+  FWK_INCLUDE_PATH="fwkacllib/include"
+  ATC_INCLUDE_PATH="atc/include"
   NNENGINE_PATH="plugin/nnengine/ge_config"
   OPSKERNEL_PATH="plugin/opskernel"
 
@@ -277,6 +267,8 @@ generate_package()
   mk_dir "${OUTPUT_PATH}/${ACL_PATH}"
   mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}"
   mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}"
+  mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}"
+  mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}"
  
   cd "${OUTPUT_PATH}"
 
@@ -289,10 +281,10 @@ generate_package()
   find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \;
 
   MAX_DEPTH=1
-  if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
-  then
-    MAX_DEPTH=2
-  fi
+#  if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
+#  then
+#    MAX_DEPTH=2
+#  fi
   for lib in "${PLUGIN_OPSKERNEL[@]}";
   do
     find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \;
@@ -318,7 +310,15 @@ generate_package()
   find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \;
   find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \;
   find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \;
-  
+
+  cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH}
+  cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH}
+  cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH}
+
+  cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH}
+  cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH}
+  cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH}
+
   if [ "x${PLATFORM}" = "xtrain" ]
   then
     tar -cf graphengine_lib.tar fwkacllib
@@ -339,4 +339,4 @@ then
   find ./ -name graphengine_lib.tar -exec rm {} \;
   tar -cf graphengine_lib.tar lib
 fi
-echo "---------------- GraphEngine package archive generated ----------------"
\ No newline at end of file
+echo "---------------- GraphEngine package archive generated ----------------"
diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 16494a33..3ae51590 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -639,15 +639,6 @@ set(INFER_SRC_LIST
     "graph/load/model_manager/task_info/model_exit_task_info.cc"
     "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
     "graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
-    "single_op/task/op_task.cc"
-    "single_op/task/build_task_utils.cc"
-    "single_op/task/tbe_task_builder.cc"
-    "single_op/task/aicpu_task_builder.cc"
-    "single_op/task/aicpu_kernel_task_builder.cc"
-    "single_op/single_op.cc"
-    "single_op/single_op_model.cc"
-    "single_op/stream_resource.cc"
-    "single_op/single_op_manager.cc"
     "hybrid/hybrid_davinci_model_stub.cc"
     "ir_build/ge_ir_build.cc"
     "ir_build/atc_ir_common.cc"
@@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE
     FMK_SUPPORT_DUMP
     DAVINCI_CLOUD
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_compile_options(ge_runner PRIVATE
     -O2
     -fno-common
+    -fvisibility=hidden
     $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
     $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
@@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
+target_link_options(ge_runner PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_runner PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     adump_server
@@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE
     FMK_HOST_INFER
     COMPILE_OMG_PACKAGE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_compile_options(ge_compiler PRIVATE
     -O2
     -fno-common
+    -fvisibility=hidden
     $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
     $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
@@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
+target_link_options(ge_compiler PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_compiler PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     static_mmpa
@@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE
     -O2
     -fvisibility=hidden
 )
+
 target_link_options(opensrc_ascendcl PRIVATE
     -rdynamic
     -Wl,--allow-multiple-definition
@@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE
     -Wl,-Bsymbolic
     -Wl,--exclude-libs,ALL
 )
+
 target_link_libraries(opensrc_ascendcl PRIVATE
                      -Wl,--whole-archive
                      ge_executor
diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt
index 0172628c..22b1a7ea 100755
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -12,7 +12,7 @@ set(PROTO_LIST
     "${METADEF_DIR}/proto/tensorflow/tensor.proto"
     "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto"
     "${METADEF_DIR}/proto/tensorflow/types.proto"
-    "${METADEF_DIR}/proto/tensorflow/versions.proto" 
+    "${METADEF_DIR}/proto/tensorflow/versions.proto"
 )
 
 protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
@@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE
     FMK_SUPPORT_DUMP
     OS_CENTOS
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_compile_options(ge_common PRIVATE
@@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
+target_link_options(ge_common PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_common PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     static_mmpa
@@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE
     $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
     $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
     LOG_CPP
+    FUNC_VISIBILITY
 )
 
 target_compile_options(ge_common_static PRIVATE
@@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE
     OS_CENTOS
     google=ascend_private
     LOG_CPP
+    FUNC_VISIBILITY
 )
 
 target_compile_options(ge_common PRIVATE
@@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
+target_link_options(ge_common PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_common PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     ascend_protobuf_static
diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc
index bf8c3ce0..67d934df 100755
--- a/ge/common/helper/model_cache_helper.cc
+++ b/ge/common/helper/model_cache_helper.cc
@@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const {
     return false;
   }
   // Compare allocated graph id info between json and VarManager
-  std::unordered_map<std::string, uint32_t> allocated_graph_id;
+  std::map<std::string, uint32_t> allocated_graph_id;
   auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
   if (ret != SUCCESS) {
     GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const {
     return false;
   }
   // Compare variable changed graph id info between json and VarManager
-  std::unordered_map<std::string, uint32_t> changed_graph_id;
+  std::map<std::string, uint32_t> changed_graph_id;
   auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
   if (ret != SUCCESS) {
     GELOGW("Fail to parse ChangedGraphId from Json.");
@@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const {
   }
   // Compare variable address info between json and VarManager
   std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
-  std::unordered_set<uint64_t> var_offset_set;
+  std::set<uint64_t> var_offset_set;
   auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
   if (ret != SUCCESS) {
     GELOGW("Fail to parse VarAddrMgrMap from Json.");
@@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const {
     GELOGW("Input param json type should be null or array.");
     return PARAM_INVALID;
   }
-  std::unordered_map<std::string, uint32_t> allocated_graph_id;
+  std::map<std::string, uint32_t> allocated_graph_id;
   auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
   if (ret != SUCCESS) {
     GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const {
     GELOGW("Input param json type should be null or array.");
     return PARAM_INVALID;
   }
-  std::unordered_map<std::string, uint32_t> changed_graph_id;
+  std::map<std::string, uint32_t> changed_graph_id;
   auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
   if (ret != SUCCESS) {
     GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
     return PARAM_INVALID;
   }
   std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
-  std::unordered_set<uint64_t> var_offset_set;
+  std::set<uint64_t> var_offset_set;
   auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
   if (ret != SUCCESS) {
     GELOGW("Fail to parse VarAddrMgrMap from Json.");
@@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemTyp
 
 Status ModelCacheHelper::ParseVarAddrMgrMapFromJson(
   const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
-  std::unordered_set<uint64_t> &var_offset_set) {
+  std::set<uint64_t> &var_offset_set) {
   if (!(json.is_array() || json.is_null())) {
     GELOGW("Input param json type should be null or array.");
     return PARAM_INVALID;
@@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson(
 }
 
 Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
-                                                     std::unordered_map<std::string, uint32_t> &changed_graph_id) {
+                                                     std::map<std::string, uint32_t> &changed_graph_id) {
   if (!(json.is_array() || json.is_null())) {
     GELOGW("Input param json type should be null or array.");
     return PARAM_INVALID;
@@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
 }
 
 Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json,
-                                                       std::unordered_map<std::string, uint32_t> &allocated_graph_id) {
+                                                       std::map<std::string, uint32_t> &allocated_graph_id) {
   if (!(json.is_array() || json.is_null())) {
     GELOGW("Input param json type should be null or array.");
     return PARAM_INVALID;
diff --git a/ge/common/helper/model_cache_helper.h b/ge/common/helper/model_cache_helper.h
index 68381e96..398d6c03 100755
--- a/ge/common/helper/model_cache_helper.h
+++ b/ge/common/helper/model_cache_helper.h
@@ -95,15 +95,15 @@ class ModelCacheHelper {
   static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource);
   static Status ParseVarAddrMgrMapFromJson(const Json &json,
                                            std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
-                                           std::unordered_set<uint64_t> &var_offset_set);
+                                           std::set<uint64_t> &var_offset_set);
   static Status ParseCurVarTensorDescMapFromJson(
       const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map);
   static Status ParseTransRoadsFromJson(const Json &json,
                                         std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads);
   static Status ParseChangedGraphIdFromJson(const Json &json,
-                                            std::unordered_map<std::string, uint32_t> &changed_graph_id);
+                                            std::map<std::string, uint32_t> &changed_graph_id);
   static Status ParseAllocatedGraphIdFromJson(const Json &json,
-                                              std::unordered_map<std::string, uint32_t> &allocated_graph_id);
+                                              std::map<std::string, uint32_t> &allocated_graph_id);
   static Status ParseBroadcastInfoFromJson(const Json &json,
                                            std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info);
   static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name);
diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc
index 9060f82b..08fdc0ae 100644
--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
     return false;
   }
 
-  std::unordered_set<uint32_t> record;
+  std::set<uint32_t> record;
   for (size_t i = 0; i < device_nums; ++i) {
     uint32_t dev_id = deviceid_list[i];
     if (dev_id >= static_cast<uint32_t>(dev_count)) {
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 26e53c7b..8e56dc50 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE
     $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
     $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
     $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
+    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
+    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
 )
 
 target_compile_definitions(ge_executor PRIVATE
@@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE
     LOG_CPP
 )
 
-target_include_directories(ge_executor PRIVATE
+target_include_directories(ge_executor SYSTEM PRIVATE
     ${GE_CODE_DIR}/ge
     ${GE_CODE_DIR}/inc
     ${GE_CODE_DIR}/inc/external
@@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE
     -Werror
     -O2
     -Wno-deprecated-declarations
+    -fvisibility=hidden
 )
 
 target_compile_definitions(ge_executor_shared PRIVATE
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
     DAVINCI_SUPPORT_PROFILING
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(ge_executor_shared PRIVATE
@@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(ge_executor_shared PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_executor_shared PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     msprofiler
diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt
index 7189e8ff..00142cfe 100755
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(ge_local_engine PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(ge_local_engine PRIVATE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(ge_local_engine PRIVATE
@@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(ge_local_engine PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_local_engine PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(atc_ge_local_engine PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(atc_ge_local_engine PRIVATE
-    COMPILE_OMG_PACKAGE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(atc_ge_local_engine PRIVATE
@@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(atc_ge_local_engine PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(atc_ge_local_engine PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR
 target_compile_options(ge_local_opskernel_builder PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(ge_local_opskernel_builder PRIVATE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(ge_local_opskernel_builder PRIVATE
@@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(ge_local_opskernel_builder PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_local_opskernel_builder PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO
 target_compile_options(atc_ge_local_opskernel_builder PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(atc_ge_local_opskernel_builder PRIVATE
@@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(atc_ge_local_opskernel_builder PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(atc_ge_local_opskernel_builder PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR
 target_compile_options(ge_local_opskernel_builder_static PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
     google=ascend_private
     LOG_CPP
+    FUNC_VISIBILITY
 )
 
 target_include_directories(ge_local_opskernel_builder_static PRIVATE
diff --git a/ge/ge_local_engine/engine/ge_local_engine.h b/ge/ge_local_engine/engine/ge_local_engine.h
index 65dfe65b..9eedb533 100644
--- a/ge/ge_local_engine/engine/ge_local_engine.h
+++ b/ge/ge_local_engine/engine/ge_local_engine.h
@@ -17,6 +17,20 @@
 #ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_
 #define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <map>
 #include <memory>
 #include <string>
@@ -32,7 +46,7 @@ namespace ge_local {
  * ge local engine.
  * Used for the ops not belong to any engine. eg:netoutput
  */
-class GeLocalEngine {
+class GE_FUNC_VISIBILITY GeLocalEngine {
  public:
   /**
    * get GeLocalEngine instance.
@@ -94,25 +108,25 @@ extern "C" {
  * When Ge start, GE will invoke this interface
  * @return The status whether initialize successfully
  */
-ge::Status Initialize(const map<string, string> &options);
+GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);
 
 /**
  * After the initialize, GE will invoke this interface to get the Ops kernel Store
  * @param ops_kernel_map The ge local's ops kernel info
  */
-void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
+GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
 
 /**
  * After the initialize, GE will invoke this interface to get the Graph Optimizer
  * @param graph_optimizers The ge local's Graph Optimizer objs
  */
-void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
+GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
 
 /**
  * When the graph finished, GE will invoke this interface
  * @return The status whether initialize successfully
  */
-ge::Status Finalize();
+GE_FUNC_VISIBILITY ge::Status Finalize();
 }
 
 #endif  // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_
diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h
index 0b99ecac..fdec212e 100644
--- a/ge/ge_local_engine/engine/host_cpu_engine.h
+++ b/ge/ge_local_engine/engine/host_cpu_engine.h
@@ -16,6 +16,20 @@
 #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
 #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <mutex>
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/node.h"
@@ -23,7 +37,7 @@
 #include "external/../register/register.h"
 
 namespace ge {
-class HostCpuEngine {
+class GE_FUNC_VISIBILITY HostCpuEngine {
  public:
   ~HostCpuEngine() = default;
 
diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h
index 8a7dafe2..38653554 100644
--- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h
+++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h
@@ -22,7 +22,7 @@
 
 namespace ge {
 namespace ge_local {
-class GeLocalOpsKernelBuilder : public OpsKernelBuilder {
+class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder {
  public:
   ~GeLocalOpsKernelBuilder() override;
   Status Initialize(const map<std::string, std::string> &options) override;
diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h
index cdfbeffa..d35b01c7 100755
--- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h
+++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h
@@ -17,6 +17,20 @@
 #ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_
 #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <map>
 #include <string>
 #include <vector>
@@ -25,7 +39,7 @@
 
 namespace ge {
 namespace ge_local {
-class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore {
+class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore {
  public:
   GeLocalOpsKernelInfoStore() = default;
 
diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h
index 55587b2e..e9efe0aa 100644
--- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h
@@ -21,7 +21,7 @@
 
 namespace ge {
 namespace ge_local {
-class GeDeletedOp : public Op {
+class GE_FUNC_VISIBILITY GeDeletedOp : public Op {
  public:
   GeDeletedOp(const Node &node, RunContext &run_context);
 
diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.h b/ge/ge_local_engine/ops_kernel_store/op/no_op.h
index 40e5766b..85b6bb58 100644
--- a/ge/ge_local_engine/ops_kernel_store/op/no_op.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.h
@@ -21,7 +21,7 @@
 
 namespace ge {
 namespace ge_local {
-class NoOp : public Op {
+class GE_FUNC_VISIBILITY NoOp : public Op {
  public:
   NoOp(const Node &node, RunContext &run_context);
 
diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.h b/ge/ge_local_engine/ops_kernel_store/op/op.h
index c5a3df7a..b75a8bed 100644
--- a/ge/ge_local_engine/ops_kernel_store/op/op.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/op.h
@@ -29,7 +29,7 @@ namespace ge_local {
 /**
  * The base class for all op.
  */
-class Op {
+class GE_FUNC_VISIBILITY Op {
  public:
   Op(const Node &node, RunContext &run_context);
 
diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h
index 0faab508..8dd411b6 100644
--- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h
+++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte
 /**
  * manage all the op, support create op.
  */
-class OpFactory {
+class GE_FUNC_VISIBILITY OpFactory {
  public:
   static OpFactory &Instance();
 
@@ -72,7 +72,7 @@ class OpFactory {
   std::vector<std::string> all_ops_;
 };
 
-class OpRegistrar {
+class GE_FUNC_VISIBILITY OpRegistrar {
  public:
   OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
     OpFactory::Instance().RegisterCreator(type, func);
diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt
index 56b5ab41..b00dd5b3 100644
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE
     -fno-common
 )
 
-target_compile_definitions(ge_runtime PRIVATE 
+target_compile_definitions(ge_runtime PRIVATE
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
     LOG_CPP
 )
@@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE
     ${CMAKE_BINARY_DIR}/proto/ge
 )
 
+target_link_options(ge_runtime PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(ge_runtime PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc
index ebd23948..41f24b94 100755
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
             }
           }
           reusable_block->continuous_block_ = continuous;
-          reusable_block->ref_count_++;
           reusable_blocks_[memory_type][stream_id].erase((++it).base());
           return reusable_block;
         }
@@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
   block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
   block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);
   block->stream_id_ = node_op_desc->GetStreamId();
-  block->ref_count_++;
   block->continuous_block_ = continuous;
   block->batch_label_ = batch_label;
   if (mem_type == kOutput) {
@@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
     // hccl task need align header and tail
     block->first_continuous_block_ = true;
     block->last_continuous_block_ = true;
+    ++(block->ref_count_);
   } else {
     GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
     return INTERNAL_ERROR;
@@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
                                  return nullptr, "Get no align size failed");
 
   std::string symbol;
+  bool reuse_input = false;
   if (IsSymbolExist(node_index_io, symbol)) {
     block = symbol_blocks_[symbol];
     GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str());
@@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
     block->SetLifeTimeEnd(life_time_);
     block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size);
     block->ref_count_++;
+    reuse_input = true;
 
     // add new size
     align_size = block_size;
@@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
                         workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type);
   }
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr.");
-  int out_count_reuse_input = block->ref_count_;
   int out_count = 0;
   GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr);
   auto out_data_anchor = n->GetOutDataAnchor(index);
@@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
       out_count++;
     }
   }
-  bool reuse_input = false;
-  for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
-    auto owner_node = in_anchor->GetOwnerNode();
-    GE_IF_BOOL_EXEC(owner_node == nullptr, continue);
-    auto op_desc = owner_node->GetOpDesc();
-    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
-    for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
-      bool dst_reuse_input = false;
-      uint32_t dst_reuse_input_index = 0;
-      auto owner_node_op_desc = op_desc->GetOutputDescPtr(i);
-      GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue);
-      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS,
-                      GELOGI("Get dst_reuse_input failed"));
-      GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
-                      GELOGI("Get dst_reuse_input_index failed"));
-      if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
-        out_count_reuse_input += 1;
-        reuse_input = true;
-      }
-    }
-  }
-  block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count;
+  block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1)
+                                                      : (block->ref_count_ + out_count);
   return block;
 }
 
@@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
       GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index,
              node_type_indexs.back().node->GetName().c_str());
 
-      if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
-          (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
+      bool is_block_matched = false;
+      for (auto &node_type_index : node_type_indexs) {
+        is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
+                           (node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()));
+        if (is_block_matched) {
+          GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, "
+                 "current node:%s, input index:%d, block ref_count:%d.",
+                 node_type_index.node->GetName().c_str(), node_type_index.index,
+                 node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_);
+          break;
+        }
+      }
+
+      if (is_block_matched) {
         ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
         if (block->ref_count_ == 0 && block->same_stream_) {
           SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
         }
+        break;
       }
     }
   }
@@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
   return;
 }
 
+void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) {
+  if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) {
+    return;
+  }
+  int64_t stream_id = node->GetOpDesc()->GetStreamId();
+  auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx));
+  bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0);
+  if (is_suspended) {
+    block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1);
+    stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block);
+    GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, "
+           "size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_);
+  }
+}
+
 Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) {
   auto op_desc = node->GetOpDesc();
   int64_t stream_id = op_desc->GetStreamId();
@@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
   // Allocate memory for the current node and release node memory of the same size in the workspace
   GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
                   for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
-                       ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
+                       ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]);
+                                 iter->second[stream_id].clear();});
   if (IsContinuousOutput(node)) {
     return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
   }
@@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
         continue;
       }
       symbol_blocks_[iter->second] = mem_block;
+      // The output is suspended, and will be released in allocation of next node.
+      CheckAndReleaseSuspendedBlock(node, i, mem_block);
     }
   }
   return SUCCESS;
@@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
     if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) {
       return;
     }
-    for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) {
-      iter->second[stream_id].clear();
-    }
     vector<int64_t> temp;
     int64_t tatal_size = 0;
     GetNodeWorkSpaceSize(n, temp, tatal_size);
@@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
                                            kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag,
                                            is_op_reuse_mem_, false, memory_type);
       GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block.");
+      ++(mem_block->ref_count_);
       CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type);
     }
     for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) {
diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h
index 4401108d..199a84f9 100755
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner {
 
   void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node);
 
+  void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block);
+
   std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;
 
   std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_;
@@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner {
 
   std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_;
 
-  std::unordered_map<std::string, uint32_t> node_continuous_input_counts_;
+  std::map<std::string, uint32_t> node_continuous_input_counts_;
 
   // reuse memory
   vector<string> op_no_reuse_mem_vec_;
diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc
index 8c5d8940..ca64c869 100755
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
 
     GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
         "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
-        node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(),
+        peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
         output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
         is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
   }
@@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node
     }
     GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
            " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
-           node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
+           out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
            output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
            is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
   }
diff --git a/ge/graph/build/run_context.cc b/ge/graph/build/run_context.cc
index 50094cf3..ba328840 100644
--- a/ge/graph/build/run_context.cc
+++ b/ge/graph/build/run_context.cc
@@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
   // Create rt label
   for (uint32_t i = 0; i < label_num; ++i) {
     rtLabel_t label = nullptr;
-    rt_ret = rtLabelCreate(&label);
+    rt_ret = rtLabelCreateV2(&label, rt_model_);
     if (rt_ret != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
       return RT_FAILED;
diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc
index 88ffda02..d90d1f40 100644
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
     }
   }
 
-  Status status = ReorderEventNodes();
+  Status status = whole_graph_->InsertGraphEvents();
   if (status != SUCCESS) {
     GELOGE(status, "Graph ReorderEventNodes failed");
     return status;
@@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() {
   return SUCCESS;
 }
 
-Status StreamAllocator::ReorderEventNodes() const {
-  Status status = whole_graph_->InsertEventNodes();
-  if (status != SUCCESS) {
-    GELOGE(status, "Whole graph InsertEventNodes failed");
-    return status;
-  }
-  for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) {
-    status = subgraph->InsertEventNodes();
-    if (status != SUCCESS) {
-      GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str());
-      return status;
-    }
-  }
-  return SUCCESS;
-}
-
 void StreamAllocator::DumpEvents() {
   map<int64_t, vector<NodePtr>> after_refresh_stream_nodes;
   for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {
diff --git a/ge/graph/build/stream_allocator.h b/ge/graph/build/stream_allocator.h
index a21b2f77..dd82700d 100644
--- a/ge/graph/build/stream_allocator.h
+++ b/ge/graph/build/stream_allocator.h
@@ -74,7 +74,6 @@ class StreamAllocator {
   Status RefreshContinuousEvents();
 
   Status InsertSyncEventNodes();
-  Status ReorderEventNodes() const;
 
   void DumpEvents();
 
diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc
index 8bd7d32e..3f4cd1bc 100755
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion
     // and it have no attr or group attr different
     // which means bad case, return error
     bool call_check = true;
-    std::unordered_set<int64_t> input_group_ids;
+    std::set<int64_t> input_group_ids;
     for (const auto &input_node : node->GetInNodes()) {
       auto iter = nodes_with_group_attr.find(input_node);
       if (iter == nodes_with_group_attr.end()) {
@@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
     return GE_GRAPH_GRAPH_NODE_NULL;
   }
 
-  int64_t node_index = 0;
-  for (auto &node : all_nodes) {
-    OpDescPtr op_desc = node->GetOpDesc();
-    GE_CHECK_NOTNULL(op_desc);
-    op_desc->SetId(node_index++);
-  }
-
   map<int64_t, vector<OpDescPtr>> all_stream_ops;
   for (auto &node : all_nodes) {
     OpDescPtr op_desc = node->GetOpDesc();
@@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin
     }
 
     if (graph->GetNeedIteration()) {
-      if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
+      if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
         profiling_point.end_index.insert(current_idx);
         GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
                op_desc->GetName().c_str(), current_idx);
diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h
index 3332e1fb..883ae41b 100644
--- a/ge/graph/common/transop_util.h
+++ b/ge/graph/common/transop_util.h
@@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil {
 
   static TransOpUtil &Instance();
 
-  typedef std::unordered_map<std::string, int> transop_index_op;
+  typedef std::map<std::string, int> transop_index_op;
   transop_index_op transop_index_map_;
 };
 }  // namespace ge
diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc
index d9b716ea..6807043a 100644
--- a/ge/graph/load/model_manager/cpu_queue_schedule.cc
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc
@@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() {
 /// @param [in] outside_addrs: model input/output memory addr
 /// @return: 0 for success / others for failed
 ///
-Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) {
+Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
   if ((args_ != nullptr) || (args_size_ > 0)) {
     GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
     return FAILED;
@@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v
   GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
 
   AddrMapInfo addr_map_info;
-  for (auto &addrs : outside_addrs) {
-    auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
-    GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
-    std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
-    for (const auto &virtual_args_addr : virtual_args_addrs) {
-      addr_map_info.addr_num += virtual_args_addr.second.size();
-    }
-  }
-  GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);
-
   // init src_addrs/dst_addrs
-  size_t index = 0;
   vector<uint64_t> src_addrs;
   vector<uint64_t> dst_addrs;
-  for (auto &addrs : outside_addrs) {
-    auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
+  for (const auto &addrs : outside_addrs) {
+    const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
     GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
     std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
     for (const auto &virtual_args_addr : virtual_args_addrs) {
+      addr_map_info.addr_num += virtual_args_addr.second.size();
       for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
-        src_addrs.push_back(mbuf_list.at(index));
+        src_addrs.emplace_back(mbuf_list.at(addrs.first));
         dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
       }
     }
-    index++;
   }
+  GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);
 
   // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
   GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h
index de4c5327..8dc44538 100644
--- a/ge/graph/load/model_manager/cpu_queue_schedule.h
+++ b/ge/graph/load/model_manager/cpu_queue_schedule.h
@@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo {
   ~CpuTaskZeroCopy() override;
 
   Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
-  Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
+  Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);
 
   Status Distribute() override;
 private:
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 95fd8392..ed2428d9 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
   };
 
   vector<OpDescPtr> output_op_list;
+  set<const void *> input_outside_addrs;
+  set<const void *> output_outside_addrs;
   map<uint32_t, OpDescPtr> data_by_index;
   map<string, OpDescPtr> variable_by_name;
   auto nodes = compute_graph->GetAllNodes();
@@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
     GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc);
 
     if (IsDataOp(op_desc->GetType())) {
-      if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) {
+      if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) {
         GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
@@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
     }
 
     if (op_desc->GetType() == NETOUTPUT) {
-      if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) {
+      if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) {
         GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
         return PARAM_INVALID;
       }
@@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
 /// @return Status
 ///
 Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
-                                map<uint32_t, OpDescPtr> &data_by_index) {
+                                map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) {
   // op_desc Checked by Init: Data, valid.
   auto op_desc = node->GetOpDesc();
   if (node->GetOwnerComputeGraph() != graph) {
@@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
     GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str());
     return PARAM_INVALID;
   }
-  new_input_data_info_[data_index] = zero_copy_offset;
-
-  for (size_t index = 0; index < virtual_addr_list.size(); ++index) {
-    void *addr = virtual_addr_list.at(index);
-    if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) {
-      continue;
-    }
-    zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_);
-    new_input_outside_addrs_[addr] = zero_copy_offset;
+  if (input_outside_addrs.count(virtual_addr) == 0) {
+    int64_t output_offset = output_offset_list.at(kDataIndex);
+    zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_);
+    input_outside_addrs.insert(virtual_addr);
   }
+  input_data_info_[data_index] = zero_copy_offset;
 
   return SUCCESS;
 }
@@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
 /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
 /// @return Status
 Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node,
-                                   vector<OpDescPtr> &output_op_list) {
+                                   vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) {
   // node->GetOpDesc Checked by Init: NetOutput, valid.
   auto op_desc = node->GetOpDesc();
   // excludes the function op sub graph, e.g. case,if
@@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
     return PARAM_INVALID;
   }
 
-  size_t num = new_output_data_info_.size();
+  size_t num = output_data_info_.size();
   bool fusion_flag = false;
 
   size_t input_count = input_size_list.size();
@@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
     Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag);
     GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.",
                                            op_desc->GetName().c_str()); return PARAM_INVALID;);
-    new_output_data_info_[num + idx] = zero_copy_offset;
     void *addr = virtual_addr_list.at(idx);
     int64_t input_offset = input_offset_list.at(idx);
-    vector<void *> tensor_addrs;
-    zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
-    auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset));
-    if (!rslt.second) {
+    if (output_outside_addrs.count(addr) == 0) {
+      vector<void *> tensor_addrs;
+      zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
+      output_outside_addrs.insert(addr);
+      for (size_t i = 0; i < tensor_addrs.size(); ++i) {
+        void *real_addr = tensor_addrs.at(i);
+        DisableZeroCopy(real_addr);
+        real_virtual_addrs_.insert(real_addr);
+      }
+    } else {
       GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str());
       DisableZeroCopy(addr);
     }
-
-    for (size_t i = 0; i < tensor_addrs.size(); ++i) {
-      void *real_addr = tensor_addrs.at(i);
-      DisableZeroCopy(real_addr);
-      real_virtual_addrs_.insert(real_addr);
-    }
+    output_data_info_[num + idx] = zero_copy_offset;
   }
   return SUCCESS;
 }
@@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
   }
 
   rtLabel_t rt_label = nullptr;
-  rtError_t rt_error = rtLabelCreateEx(&rt_label, stream);
+  rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream);
   if (rt_error != RT_ERROR_NONE || rt_label == nullptr) {
     GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error);
     return INTERNAL_ERROR;
@@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() {
     return SUCCESS;
   }
 
-  if (input_queue_ids_.size() != new_input_data_info_.size()) {
+  if (input_queue_ids_.size() != input_data_info_.size()) {
     GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu",
-           input_queue_ids_.size(), new_input_data_info_.size());
+           input_queue_ids_.size(), input_data_info_.size());
     return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
   }
 
-  if (output_queue_ids_.size() != new_output_data_info_.size()) {
+  if (output_queue_ids_.size() != output_data_info_.size()) {
     GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID,
            "Output queue ids not match model: output_queue=%zu output_data=%zu",
-           output_queue_ids_.size(), new_output_data_info_.size());
+           output_queue_ids_.size(), output_data_info_.size());
     return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
   }
 
   GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed.");
   // Binding input_queue and Data Op.
   GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed.");
-  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed.");
+  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed.");
 
   // Binding output_queue and NetOutput Op.
   GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed.");
-  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed.");
+  GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed.");
 
   GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed.");
   GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed.");
@@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() {
 Status DavinciModel::BindInputQueue() {
   // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
   for (size_t i = 0; i < input_queue_ids_.size(); ++i) {
-    auto it = new_input_data_info_.find(i);
-    if (it == new_input_data_info_.end()) {
-      GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i);
+    auto it = input_data_info_.find(i);
+    if (it == input_data_info_.end()) {
+      GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i);
       return FAILED;
     }
 
@@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
 }
 
 Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
-                                          std::map<const void *, ZeroCopyOffset> &outside_addrs) {
+                                          const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
   GELOGI("Set CpuKernel model zero_copy task enter.");
   std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_);
   if (zero_copy == nullptr) {
@@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
 Status DavinciModel::BindOutputQueue() {
   // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
   for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
-    auto it = new_output_data_info_.find(i);
-    if (it == new_output_data_info_.end()) {
-      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
+    auto it = output_data_info_.find(i);
+    if (it == output_data_info_.end()) {
+      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
       return FAILED;
     }
 
@@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() {
 
 Status DavinciModel::BindEnqueue() {
   for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
-    auto it = new_output_data_info_.find(i);
-    if (it == new_output_data_info_.end()) {
-      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
+    auto it = output_data_info_.find(i);
+    if (it == output_data_info_.end()) {
+      GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
       return FAILED;
     }
 
@@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs
 Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
   rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
   const std::vector<DataBuffer> &blobs = input_data.blobs;
-  for (const auto &data : new_input_data_info_) {
+  for (const auto &data : input_data_info_) {
     if (data.first >= blobs.size()) {
       GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(),
-             new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
+             input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
              data.second.GetOpName().c_str());
       return FAILED;
     }
@@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
 
   output_data.index = data_id;
   output_data.model_id = model_id_;
-  if (output_data.blobs.size() != new_output_data_info_.size()) {
+  if (output_data.blobs.size() != output_data_info_.size()) {
     GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(),
-           new_output_data_info_.size());
+           output_data_info_.size());
     return FAILED;
   }
 
   std::vector<DataBuffer> &blobs = output_data.blobs;
   size_t idx = 0;
-  for (const auto &output : new_output_data_info_) {
+  for (const auto &output : output_data_info_) {
     if (output.first >= blobs.size()) {
       GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(),
-             new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
+             input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
       return FAILED;
     }
 
@@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
 /// @return None.
 ///
 void DavinciModel::SetCopyOnlyOutput() {
-  for (const auto &output_outside_addrs : new_output_outside_addrs_) {
+  for (const auto &output_outside_addrs : output_data_info_) {
     ZeroCopyOffset output_outside = output_outside_addrs.second;
+    if (!output_outside.IsRelativeOffsetValid()) {
+      return;
+    }
     for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) {
       auto &addrs_mapping_list = output_outside.GetOutsideAddrs();
       std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count];
@@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v
   for (size_t i = 0; i < nums; ++i) {
     std::lock_guard<std::mutex> lock(outside_addrs_mutex_);
 
-    for (auto &input_outside_addrs : new_input_outside_addrs_) {
+    for (auto &input_outside_addrs : input_data_info_) {
       ZeroCopyOffset &input_outside = input_outside_addrs.second;
       input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
     }
 
-    for (auto &output_outside_addrs : new_output_outside_addrs_) {
+    for (auto &output_outside_addrs : output_data_info_) {
       ZeroCopyOffset &output_outside = output_outside_addrs.second;
       output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
     }
@@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
 /// @return SUCCESS handle successfully / PARAM_INVALID for failed
 ///
 Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) {
-  if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
+  if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
     GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed.");
     return ACL_ERROR_GE_PARAM_INVALID;
   }
 
-  if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
+  if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
       SUCCESS) {
     GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed.");
     return ACL_ERROR_GE_PARAM_INVALID;
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index 53e9cd4d..8ed82912 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -675,7 +675,7 @@ class DavinciModel {
   /// @return Status
   ///
   Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
-                    map<uint32_t, OpDescPtr> &data_by_index);
+                    map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);
 
   ///
   /// @ingroup ge
@@ -694,7 +694,8 @@ class DavinciModel {
   /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
   /// @return Status
   ///
-  Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list);
+  Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
+                       set<const void *> &output_outside_addrs);
 
   ///
   /// @ingroup ge
@@ -764,7 +765,7 @@ class DavinciModel {
   ///
   Status BindInputQueue();
 
-  Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs);
+  Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);
 
   ///
   /// @ingroup ge
@@ -897,10 +898,8 @@ class DavinciModel {
   void *global_step_addr_{nullptr};
   uint64_t global_step_size_{0};
 
-  map<uint32_t, ZeroCopyOffset> new_input_data_info_;
-  map<uint32_t, ZeroCopyOffset> new_output_data_info_;
-  map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
-  map<const void *, ZeroCopyOffset> new_output_outside_addrs_;
+  map<uint32_t, ZeroCopyOffset> input_data_info_;
+  map<uint32_t, ZeroCopyOffset> output_data_info_;
 
   set<const void *> real_virtual_addrs_;
 
diff --git a/ge/graph/load/model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h
index 64a64930..74ce5a16 100644
--- a/ge/graph/load/model_manager/ts_mem_mall.h
+++ b/ge/graph/load/model_manager/ts_mem_mall.h
@@ -100,8 +100,8 @@ class TsMemMall {
 
  private:
   std::mutex mem_mutex_;
-  std::unordered_map<int64_t, void *> mem_store_size_;
-  std::unordered_map<void *, int64_t> mem_store_addr_;
+  std::map<int64_t, void *> mem_store_size_;
+  std::map<void *, int64_t> mem_store_addr_;
   rtMemType_t mem_type_;
 };
 }  // namespace ge
diff --git a/ge/graph/load/model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc
index 3f8555bb..4a448869 100644
--- a/ge/graph/load/model_manager/zero_copy_offset.cc
+++ b/ge/graph/load/model_manager/zero_copy_offset.cc
@@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const
   }
 }
 
-void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
-                                          bool fusion_flag, std::set<const void *> &real_virtual_addrs) {
+void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag,
+                                          set<const void *> &real_virtual_addrs) {
   uint32_t out_count = 0;
   if (!fusion_flag) {
     out_count++;
@@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
     real_virtual_addrs.insert(addr);
   } else {
     GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
-    int64_t output_offset = output_offset_list.at(index);
     for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
       if (zero_copy_basic_offset_.at(i) == output_offset) {
         out_count++;
@@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
     }
   }
   addr_count_ = out_count;
+  valid_relative_offset_ = true;
 }
 
 void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
@@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
     }
   }
   addr_count_ = out_count;
+  valid_relative_offset_ = true;
 }
 
 void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
+  if (!valid_relative_offset_) {
+    return;
+  }
   const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
   for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
     auto args_addrs = outside_addrs_[out_count].find(outside_addr);
diff --git a/ge/graph/load/model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h
index fc63fced..82e1bb6d 100644
--- a/ge/graph/load/model_manager/zero_copy_offset.h
+++ b/ge/graph/load/model_manager/zero_copy_offset.h
@@ -43,8 +43,7 @@ class ZeroCopyOffset {
   ~ZeroCopyOffset();
 
   Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
-  void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
-                            bool fusion_flag, std::set<const void *> &real_virtual_addrs);
+  void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs);
 
   void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag);
   Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list,
@@ -65,9 +64,10 @@ class ZeroCopyOffset {
   // data_size of Data/Netoutput
   int64_t GetDataSize() const { return data_size_; }
   // value of *outside_addrs_ from davinci_model
-  const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
+  const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
   // name of op
   std::string GetOpName() const { return op_name_; }
+  const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }
 
  private:
   void *basic_addr_ = nullptr;
@@ -81,6 +81,7 @@ class ZeroCopyOffset {
 
   std::vector<int64_t> zero_copy_basic_offset_;
   std::vector<int64_t> zero_copy_relative_offset_;
+  bool valid_relative_offset_ = false;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index b6598f11..8b57858d 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -131,7 +131,7 @@ bool IsTailingOptimization() {
 }
 
 ge::Status CheckFpCeilingMode() {
-  static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
+  static const std::set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
   string mode;
   auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode);
   if (ret == ge::GRAPH_SUCCESS) {
diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h
index 924ddcb7..0da12f9c 100755
--- a/ge/graph/manager/graph_var_manager.h
+++ b/ge/graph/manager/graph_var_manager.h
@@ -170,8 +170,8 @@ class VarResource {
   std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_;
   std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_;
   std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_;
-  std::unordered_map<std::string, uint32_t> var_names_to_changed_graph_id_;
-  std::unordered_map<std::string, uint32_t> var_names_to_allocated_graph_id_;
+  std::map<std::string, uint32_t> var_names_to_changed_graph_id_;
+  std::map<std::string, uint32_t> var_names_to_allocated_graph_id_;
   std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_;
 };
 
diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc
index fbc13920..d584337e 100755
--- a/ge/graph/partition/graph_partition.cc
+++ b/ge/graph/partition/graph_partition.cc
@@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo
   /// Avoid recursion since stack space might be limited.
   /// We instead keep a stack of nodes to visit.
   std::vector<size_t> temp_stack;
-  std::unordered_set<size_t> visited;
+  std::set<size_t> visited;
   temp_stack.push_back(src);
   while (!temp_stack.empty()) {
     size_t cluster = temp_stack.back();
diff --git a/ge/graph/partition/graph_partition.h b/ge/graph/partition/graph_partition.h
index 9c22d40c..f34c67e6 100644
--- a/ge/graph/partition/graph_partition.h
+++ b/ge/graph/partition/graph_partition.h
@@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>;
 using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>;
 using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>;
 using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>;
-using ClusterSet = std::unordered_set<size_t>;
+using ClusterSet = std::set<size_t>;
 class Cluster {
  public:
   size_t index_;              // corresponding to rank of node
diff --git a/ge/graph/passes/constant_folding_pass.cc b/ge/graph/passes/constant_folding_pass.cc
index 4db14fc3..8a0c6c3c 100644
--- a/ge/graph/passes/constant_folding_pass.cc
+++ b/ge/graph/passes/constant_folding_pass.cc
@@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node,
   return FoldingPass::RunOpKernel(node, inputs, outputs);
 }
 
-const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>>
+const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
     &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
   return statistic_of_ge_constant_folding_;
 }
 
-const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>>
+const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
     &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
   return statistic_of_op_constant_folding_;
 }
diff --git a/ge/graph/passes/constant_folding_pass.h b/ge/graph/passes/constant_folding_pass.h
index c977157e..703e6edd 100644
--- a/ge/graph/passes/constant_folding_pass.h
+++ b/ge/graph/passes/constant_folding_pass.h
@@ -26,11 +26,11 @@ namespace ge {
 class ConstantFoldingPass : public FoldingPass {
  public:
   Status Run(ge::NodePtr &node) override;
-  const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const;
-  const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const;
+  const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const;
+  const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const;
  private:
-  std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_;
-  std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_;
+  std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_;
+  std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_;
 };
 }  // namespace ge
 
diff --git a/ge/graph/passes/hccl_continuous_memcpy_pass.cc b/ge/graph/passes/hccl_continuous_memcpy_pass.cc
index 7dd2fb06..cc928479 100644
--- a/ge/graph/passes/hccl_continuous_memcpy_pass.cc
+++ b/ge/graph/passes/hccl_continuous_memcpy_pass.cc
@@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph,
   }
   GELOGI("Create Assign op:%s.", op_desc->GetName().c_str());
 
+  if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) {
+    GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str());
+    return nullptr;
+  }
+
   graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
   if (ret != GRAPH_SUCCESS) {
     GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail.");
diff --git a/ge/graph/passes/hccl_continuous_memcpy_pass.h b/ge/graph/passes/hccl_continuous_memcpy_pass.h
index 0a21c896..538e89e9 100644
--- a/ge/graph/passes/hccl_continuous_memcpy_pass.h
+++ b/ge/graph/passes/hccl_continuous_memcpy_pass.h
@@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass {
 
   bool IsDataNode(const std::string& node_type);
 
-  std::unordered_map<std::string, uint32_t> node_num_map_;
+  std::map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
 
diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h
index feea82d9..7ab63c59 100755
--- a/ge/graph/passes/hccl_memcpy_pass.h
+++ b/ge/graph/passes/hccl_memcpy_pass.h
@@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass {
 
   bool IsDataNode(const std::string& node_type);
 
-  std::unordered_map<std::string, uint32_t> node_num_map_;
+  std::map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
 
diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc
index 17a1e3bb..b8fb6bde 100755
--- a/ge/graph/passes/multi_batch_clone_pass.cc
+++ b/ge/graph/passes/multi_batch_clone_pass.cc
@@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
   }
 
   // parser data dynamic info from atc parameter --input_shape
-  if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims,
-                                          data_to_dynamic_info_) != SUCCESS) {
+  if (CheckAndParseDynamicData() != SUCCESS) {
     GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed");
     return PARAM_INVALID;
   }
@@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
   return SUCCESS;
 }
 
+Status MultiBatchClonePass::CheckAndParseDynamicData() {
+  size_t unknown_shape_count = 0;
+  auto data_name_and_shape = GetLocalOmgContext().user_input_dims;
+  std::vector<std::string> data_name_order;
+  for (auto &item : data_name_and_shape) {
+    data_name_order.push_back(item.first);
+  }
+  if (!getnext_sink_dynamic_dims_) {
+    for (const auto &node : all_data_nodes_) {
+      auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex);
+      auto data_shape = data_desc.GetShape();
+      auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" :
+                         data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others";
+      auto data_name = node->GetName();
+
+      const auto &data_shape_dims = data_shape.GetDims();
+      if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) {
+        continue;
+      }
+      ++unknown_shape_count;
+      auto iter = find(data_name_order.begin(), data_name_order.end(), data_name);
+      if (iter == data_name_order.end()) {
+        if (!GetLocalOmgContext().dynamic_batch_size.empty()) {
+          auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name);
+          GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.",
+                                               data_name.c_str()); return PARAM_INVALID);
+        } else if (!GetLocalOmgContext().dynamic_image_size.empty()) {
+          auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format);
+          GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.",
+                                               data_name.c_str()); return PARAM_INVALID);
+        } else if (!GetLocalOmgContext().dynamic_dims.empty()) {
+          ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"},
+            {"--input_shape", "all dynamic data must be set in --input_shape"});
+          GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape",
+                 node->GetName().c_str(), data_shape.ToString().c_str());
+          return INTERNAL_ERROR;
+        }
+        data_name_and_shape.emplace_back(data_name, data_shape_dims);
+      }
+    }
+  }
+  auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_);
+  GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info.");
+  if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) {
+    ErrorManager::GetInstance().ATCReportErrMessage("E10040");
+    GELOGE(PARAM_INVALID,
+           "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims");
+    return PARAM_INVALID;
+  }
+  return SUCCESS;
+}
+
 Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) {
   data_count_from_getnext_ = 0;
   getnext_sink_dynamic_dims_ = false;
diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h
index 66e92892..0dae88ca 100755
--- a/ge/graph/passes/multi_batch_clone_pass.h
+++ b/ge/graph/passes/multi_batch_clone_pass.h
@@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass {
   /// @return 0: SUCCESS / others: FAILED
   ///
   Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num);
+  
+  Status CheckAndParseDynamicData();
 
   std::string session_graph_id_;
   std::vector<std::vector<int64_t>> batch_shapes_;
diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h
index 05628871..e82ec17f 100644
--- a/ge/graph/passes/switch_to_stream_switch_pass.h
+++ b/ge/graph/passes/switch_to_stream_switch_pass.h
@@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass {
   std::vector<NodePtr> stream_switch_nodes_;
   std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_;
   std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_;
-  std::unordered_map<std::string, uint32_t> node_num_map_;
+  std::map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_
diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc
index e43c5dd2..215b31ee 100644
--- a/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){
       }
     }
   }
-  auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_);
+  auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_);
   GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info.");
   if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) {
     ErrorManager::GetInstance().ATCReportErrMessage("E10040");
diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc
index 84f38fa6..3bde0efb 100644
--- a/ge/graph/preprocess/multi_batch_options.cc
+++ b/ge/graph/preprocess/multi_batch_options.cc
@@ -377,7 +377,7 @@ bool InitDynamicParams(vector<vector<int64_t>> &shapes) {
 /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims.
 /// @return true: Configed for Multi batch / false: Not configed for Multi batch.
 ///
-Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
+Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes,
                                vector<pair<string, vector<int64_t>>> &data_name_and_shape,
                                map<string, vector<vector<int64_t>> > &data_to_dynamic_info) {
   size_t cur_data_index = 0;
diff --git a/ge/graph/preprocess/multi_batch_options.h b/ge/graph/preprocess/multi_batch_options.h
index 9baf4f43..0ddaea0d 100644
--- a/ge/graph/preprocess/multi_batch_options.h
+++ b/ge/graph/preprocess/multi_batch_options.h
@@ -74,7 +74,7 @@ Status CalcShape(const std::vector<int64_t> &batch_shape, GeShape &data_shape);
 /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims.
 /// @return SUCCESS / PARAM_INVALID
 ///
-Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
+Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes,
                                vector<pair<string, vector<int64_t>>> &data_name_and_shape,
                                map<string, vector<vector<int64_t>>> &data_to_dynamic_info);
 
@@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc);
 /// @param [in] const string &data_name: cur data name.
 /// @return 0: true/false
 ///
-bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name);
+GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name);
 
 ///
 /// @ingroup ge
@@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_nam
 /// @param [in]  const std::string &input_format: format of input.
 /// @return 0: true/false
 ///
-bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
+GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
                                 const std::string &input_format);
 
 }  // namespace multibatch
diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt
index cbd0bd8b..13cb7434 100644
--- a/ge/host_cpu_engine/CMakeLists.txt
+++ b/ge/host_cpu_engine/CMakeLists.txt
@@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(host_cpu_engine PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(host_cpu_engine PRIVATE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(host_cpu_engine PRIVATE
@@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(host_cpu_engine PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(host_cpu_engine PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
 target_compile_options(atc_host_cpu_engine PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(atc_host_cpu_engine PRIVATE
-    COMPILE_OMG_PACKAGE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(atc_host_cpu_engine PRIVATE
@@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(atc_host_cpu_engine PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(atc_host_cpu_engine PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})
 target_compile_options(host_cpu_opskernel_builder PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(host_cpu_opskernel_builder PRIVATE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(host_cpu_opskernel_builder PRIVATE
@@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(host_cpu_opskernel_builder PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(host_cpu_opskernel_builder PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})
 target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
     google=ascend_private
+    FUNC_VISIBILITY
 )
 
 target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
@@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(atc_host_cpu_opskernel_builder PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
@@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})
 target_compile_options(host_cpu_opskernel_builder_static PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
     google=ascend_private
     LOG_CPP
+    FUNC_VISIBILITY
 )
 
 target_include_directories(host_cpu_opskernel_builder_static PRIVATE
diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.h b/ge/host_cpu_engine/engine/host_cpu_engine.h
index c8d5608f..c29df00c 100644
--- a/ge/host_cpu_engine/engine/host_cpu_engine.h
+++ b/ge/host_cpu_engine/engine/host_cpu_engine.h
@@ -17,6 +17,20 @@
 #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
 #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <map>
 #include <memory>
 #include <string>
@@ -32,7 +46,7 @@ namespace host_cpu {
  * host cpu engine.
  * Used for the ops which executes on host.
  */
-class HostCpuEngine {
+class GE_FUNC_VISIBILITY HostCpuEngine {
  public:
   /**
    * get HostCpuEngine instance.
@@ -87,25 +101,25 @@ extern "C" {
  * When Ge start, GE will invoke this interface
  * @return The status whether initialize successfully
  */
-ge::Status Initialize(const map<string, string> &options);
+GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);
 
 /**
  * After the initialize, GE will invoke this interface to get the Ops kernel Store
  * @param ops_kernel_map The host cpu's ops kernel info
  */
-void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
+GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
 
 /**
  * After the initialize, GE will invoke this interface to get the Graph Optimizer
  * @param graph_optimizers The host cpu's Graph Optimizer objs
  */
-void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
+GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
 
 /**
  * When the graph finished, GE will invoke this interface
  * @return The status whether initialize successfully
  */
-ge::Status Finalize();
+GE_FUNC_VISIBILITY ge::Status Finalize();
 }
 
 #endif  // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
index 82375b9f..066d943c 100644
--- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
+++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
@@ -17,11 +17,25 @@
 #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_
 #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include "common/opskernel/ops_kernel_builder.h"
 
 namespace ge {
 namespace host_cpu {
-class HostCpuOpsKernelBuilder : public OpsKernelBuilder {
+class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder {
  public:
   Status Initialize(const map<std::string, std::string> &options) override;
 
diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
index f7539f8e..e3667d61 100644
--- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
+++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
@@ -17,6 +17,20 @@
 #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_
 #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <map>
 #include <string>
 #include <vector>
@@ -25,7 +39,7 @@
 
 namespace ge {
 namespace host_cpu {
-class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
+class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
  public:
   HostCpuOpsKernelInfoStore() {}
   ~HostCpuOpsKernelInfoStore() override = default;
diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h
index 0f560485..023eb957 100644
--- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h
+++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h
@@ -21,7 +21,7 @@
 
 namespace ge {
 namespace host_cpu {
-class HostOp : public Op {
+class GE_FUNC_VISIBILITY HostOp : public Op {
  public:
   HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}
   ~HostOp() override = default;
diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op.h b/ge/host_cpu_engine/ops_kernel_store/op/op.h
index c094f080..b4c8b33e 100644
--- a/ge/host_cpu_engine/ops_kernel_store/op/op.h
+++ b/ge/host_cpu_engine/ops_kernel_store/op/op.h
@@ -29,7 +29,7 @@ namespace host_cpu {
 /**
  * The base class for all op.
  */
-class Op {
+class GE_FUNC_VISIBILITY Op {
  public:
   Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {}
   virtual ~Op() = default;
diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
index 3a235ffd..73174860 100644
--- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
+++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte
 /**
  * manage all the op, support create op.
  */
-class OpFactory {
+class GE_FUNC_VISIBILITY OpFactory {
  public:
   static OpFactory &Instance();
 
@@ -70,7 +70,7 @@ class OpFactory {
   std::vector<std::string> all_ops_;
 };
 
-class OpRegistrar {
+class GE_FUNC_VISIBILITY OpRegistrar {
  public:
   OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
     OpFactory::Instance().RegisterCreator(type, func);
diff --git a/ge/hybrid/common/tensor_value.cc b/ge/hybrid/common/tensor_value.cc
index 16ecfaa4..c691c6f3 100644
--- a/ge/hybrid/common/tensor_value.cc
+++ b/ge/hybrid/common/tensor_value.cc
@@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s
 TensorValue::~TensorValue() { Destroy(); }
 
 void TensorValue::Destroy() {
-  if (buffer_ != nullptr || ref_buffer_ != nullptr) {
+  if (buffer_ != nullptr) {
     GELOGD("Unref tensor: %s", DebugString().c_str());
     buffer_.reset();
   }
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index c47dafc1..9c4bb217 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
   GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_));
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End");
 
-  HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call.");
+  HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs),
+                        "Failed to execute partitioned call.");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");
 
   HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");
 
+  args.outputs.clear();
   HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End");
   return SUCCESS;
diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc
index f8f122b1..8b194233 100644
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
 }
 
 Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
-                                      const std::vector<ConstGeTensorDescPtr> &input_desc) {
+                                      const std::vector<ConstGeTensorDescPtr> &input_desc,
+                                      const std::vector<TensorValue> &outputs) {
   GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
   GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());
-
+  if (!outputs.empty()) {
+    GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
+                      "Failed to enable output zero copy by user provided outputs.");
+  }
   if (!graph_item_->IsDynamic()) {
     return ExecuteAsyncForKnownShape(inputs);
   }
@@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
   return SUCCESS;
 }
 
+Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
+                                      const std::vector<ConstGeTensorDescPtr> &input_desc) {
+  return ExecuteAsync(inputs, input_desc, {});
+}
+
 Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
   GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
   if (graph_item_->GetAllNodes().size() != 1) {
@@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
 
   return SUCCESS;
 }
+
+Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) {
+  GELOGD("To enable zero copy, output number = %zu", outputs.size());
+  const auto &output_edges = graph_item_->GetOutputEdges();
+  // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
+  if (outputs.size() != output_edges.size()) {
+    GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
+           output_edges.size(),
+           outputs.size());
+    return PARAM_INVALID;
+  }
+
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    auto &output_tensor = outputs[i];
+    auto &output_node = output_edges[i].first;
+    int output_idx = output_edges[i].second;
+    GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s",
+           graph_item_->GetName().c_str(),
+           i,
+           output_node->NodeName().c_str(),
+           output_idx,
+           output_tensor.DebugString().c_str());
+
+    GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
+                      "[%s] Failed to set input tensor[%zu]",
+                      graph_item_->GetName().c_str(),
+                      i);
+  }
+
+  GELOGD("Done enabling zero copy for outputs successfully.");
+  return SUCCESS;
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h
index 4523e2c4..2b7e9371 100644
--- a/ge/hybrid/executor/subgraph_executor.h
+++ b/ge/hybrid/executor/subgraph_executor.h
@@ -43,7 +43,19 @@ class SubgraphExecutor {
    * @param input_desc      input tensor descriptions
    * @return SUCCESS on success, error code otherwise
    */
-  Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc);
+  Status ExecuteAsync(const std::vector<TensorValue> &inputs,
+                      const std::vector<ConstGeTensorDescPtr> &input_desc);
+
+  /**
+   * Execute subgraph async, output tensor address(not data) and output tensor descriptions are
+   * valid after this method returned
+   * @param inputs          input tensors
+   * @param input_desc      input tensor descriptions
+   * @return SUCCESS on success, error code otherwise
+   */
+  Status ExecuteAsync(const std::vector<TensorValue> &inputs,
+                      const std::vector<ConstGeTensorDescPtr> &input_desc,
+                      const std::vector<TensorValue> &outputs);
 
   /**
    * Execute subgraph async, output tensor address(not data) and output tensor descriptions are
@@ -76,6 +88,7 @@ class SubgraphExecutor {
 
  private:
   Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state);
+  Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs);
   static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state);
   Status Init(const std::vector<TensorValue> &inputs,
               const std::vector<ConstGeTensorDescPtr> &input_desc);
diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc
index 7e5d8fe5..4511c2b9 100644
--- a/ge/hybrid/model/hybrid_model.cc
+++ b/ge/hybrid/model/hybrid_model.cc
@@ -40,9 +40,14 @@ HybridModel::~HybridModel() {
   GELOGD("[%s] HybridModel destroyed.", model_name_.c_str());
 }
 
-Status HybridModel::Init() {
+Status HybridModel::Init(bool is_single_op) {
   GELOGD("Start to init hybrid model.");
-  GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
+  is_single_op_ = is_single_op;
+  if (is_single_op) {
+    GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model.");
+  } else {
+    GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
+  }
   GELOGD("HybridModel initialized successfully.");
   return SUCCESS;
 }
diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h
index 72495cad..1f973d1e 100644
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@@ -37,7 +37,7 @@ class HybridModel {
 
   ~HybridModel();
 
-  Status Init();
+  Status Init(bool is_single_op = false);
 
   const NodeItem *GetNodeItem(const NodePtr &node) const;
 
@@ -69,6 +69,10 @@ class HybridModel {
     return model_id_;
   }
 
+  bool IsSingleOp() const {
+    return is_single_op_;
+  }
+
   TensorValue* GetVariable(const string &name) const;
 
   NodePtr GetVariableNode(const string &name) const;
@@ -131,11 +135,13 @@ class HybridModel {
   std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;
 
   bool is_new_model_desc_ = false;    // support aipp
+  bool is_single_op_ = false;
 
   // runtime fields
   uint32_t device_id_ = 0;
   uint32_t model_id_ = 0;
   uint8_t *var_mem_base_ = nullptr;
+  std::unique_ptr<TensorBuffer> weight_buffer_;
   RuntimeParam root_runtime_param_;
 };
 }  // namespace hybrid
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index b314c6a7..03e76bc9 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() {
   return SUCCESS;
 }
 
+Status HybridModelBuilder::BuildForSingleOp() {
+  GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
+  hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
+  GELOGI("[%s] Start to build hybrid model.", GetGraphName());
+  auto ret = ge_root_model_->GetSubgraphInstanceNameToModel();
+  const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()];
+  GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model),
+                    "[%s] Failed to index task defs", GetGraphName());
+  GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
+  GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
+  GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
+  GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName());
+  return SUCCESS;
+}
+
 Status HybridModelBuilder::ValidateParams() {
   GE_CHECK_NOTNULL(ge_root_model_);
   GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph());
@@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() {
 }
 
 Status HybridModelBuilder::InitWeights() {
+  // For constant in root graph
+  const auto &root_graph = ge_root_model_->GetRootGraph();
+  const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel();
+  auto iter = subgraph_models.find(root_graph->GetName());
+  if (iter == subgraph_models.end()) {
+    GELOGD("Root graph model not found");
+    return SUCCESS;
+  }
+
+  auto &root_model = iter->second;
+  const auto &weight_buffer = root_model->GetWeight();
+  if (weight_buffer.GetSize() == 0) {
+    GELOGD("weight is empty");
+    return SUCCESS;
+  }
+
   auto allocator = NpuMemoryAllocator::GetAllocator();
   GE_CHECK_NOTNULL(allocator);
-
-  for (auto &it : hybrid_model_.node_items_) {
-    auto &node_item = it.second;
-    if (node_item->node_type != CONSTANT) {
+  hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size());
+  GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_);
+  auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData());
+  GE_CHK_RT_RET(rtMemcpy(weight_base,
+                         hybrid_model_.weight_buffer_->GetSize(),
+                         weight_buffer.GetData(),
+                         weight_buffer.GetSize(),
+                         RT_MEMCPY_HOST_TO_DEVICE));
+
+  GELOGI("Init weight mem successfully, weight base %p, weight size = %zu",
+         weight_base,
+         hybrid_model_.weight_buffer_->GetSize());
+  for (auto &node : root_graph->GetDirectNode()) {
+    if (node->GetType() != CONSTANT) {
       continue;
     }
 
-    const auto &constant_node = node_item->node;
-    auto op_desc = constant_node->GetOpDesc();
+    auto op_desc = node->GetOpDesc();
     auto v_weights = ModelUtils::GetWeights(op_desc);
     if (v_weights.empty()) {
-      GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str());
       return INTERNAL_ERROR;
     }
     auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
-    auto output_desc = op_desc->MutableOutputDesc(0);
-    GE_CHECK_NOTNULL(output_desc);
-    auto tensor_size = ge_tensor->GetData().GetSize();
-    GELOGD("[%s] Start to init Constant node [%s], size = %ld",
+    GE_CHECK_NOTNULL(ge_tensor);
+    const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc();
+    int64_t tensor_size = 0;
+    GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size),
+                            "[%s] Failed to get tensor size",
+                            node->GetName().c_str());
+    int64_t data_offset = 0;
+    GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset),
+                            "[%s] Failed to get data offset",
+                            node->GetName().c_str());
+    GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld",
            GetGraphName(),
-           constant_node->GetName().c_str(),
-           tensor_size);
+           node->GetName().c_str(),
+           tensor_size,
+           data_offset);
 
-    auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
+    auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size);
     GE_CHECK_NOTNULL(tensor_buffer);
     std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer)));
     GE_CHECK_NOTNULL(constant_tensor);
     constant_tensor->SetName("Constant_" + op_desc->GetName());
-    if (tensor_size > 0) {
-      GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(),
-                             constant_tensor->GetSize(),
-                             ge_tensor->GetData().data(),
-                             ge_tensor->GetData().size(),
-                             RT_MEMCPY_HOST_TO_DEVICE));
-    }
-
-    hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor));
-    GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size);
+    hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor));
+    GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size);
   }
   return SUCCESS;
 }
@@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr
   return SUCCESS;
 }
 
+Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) {
+  // index task defs
+  GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str());
+  std::unordered_map<int64_t, NodePtr> node_map;
+  for (const auto &node : sub_graph->GetDirectNode()) {
+    GE_CHECK_NOTNULL(node);
+    GE_CHECK_NOTNULL(node->GetOpDesc());
+    auto node_id = node->GetOpDesc()->GetId();
+    GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str());
+    node_map.emplace(node_id, node);
+  }
+
+  auto tasks = ge_model->GetModelTaskDefPtr()->task();
+  for (int i = 0; i < tasks.size(); ++i) {
+    const domi::TaskDef &task_def = tasks[i];
+    GELOGI("Task id = %d, task type = %d", i, task_def.type());
+    auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
+    uint32_t op_index = -1;
+    if (task_type == RT_MODEL_TASK_KERNEL) {
+      op_index = task_def.kernel().context().op_index();
+    } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
+      op_index = task_def.kernel_ex().op_index();
+    } else if (task_type == RT_MODEL_TASK_HCCL) {
+      op_index = task_def.kernel_hccl().op_index();
+    } else {
+      GELOGD("Skip task type: %d", static_cast<int>(task_type));
+      continue;
+    }
+
+    auto iter = node_map.find(op_index);
+    if (iter == node_map.end()) {
+      GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index);
+      return INTERNAL_ERROR;
+    }
+
+    auto &node = iter->second;
+    if (task_type == RT_MODEL_TASK_KERNEL) {
+      ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
+    }
+
+    GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index);
+    hybrid_model_.task_defs_[node].emplace_back(task_def);
+  }
+
+  return SUCCESS;
+}
+
 Status HybridModelBuilder::IndexTaskDefs() {
   const auto &root_graph = ge_root_model_->GetRootGraph();
   if (SetOutputNameAttr(*root_graph) != SUCCESS) {
diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h
index 045bf3ef..71663a6e 100644
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -35,6 +35,7 @@ class HybridModelBuilder {
   explicit HybridModelBuilder(HybridModel &hybrid_model);
   ~HybridModelBuilder() = default;
   Status Build();
+  Status BuildForSingleOp();
 
  private:
   static Status UpdateAnchorStatus(const NodePtr &node);
@@ -64,6 +65,7 @@ class HybridModelBuilder {
   Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies);
   Status ParseDependentForFusedSubgraph(NodeItem &node_item);
   Status IndexTaskDefs();
+  Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model);
   Status IndexSpecialNodes();
   Status InitRuntimeParams();
   Status InitModelMem();
diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
index cb5a7d4c..3174df80 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() {
 Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
   GE_CHECK_NOTNULL(node);
   GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str());
+  bool is_single_op = model.IsSingleOp();
 
   auto *task_defs = model.GetTaskDefs(node);
   if (task_defs == nullptr || task_defs->empty()) {
@@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod
 
   AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs);
   std::unique_ptr<NodeTask> node_task;
-  GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str());
+  GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op),
+                    "[%s] Failed to build op tasks.", node->GetName().c_str());
   task = std::move(node_task);
   GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str());
   return SUCCESS;
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index f1bd6466..a34bba22 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
   }
   TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
   rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
-  if (rt_ret != RT_ERROR_NONE) {
+  if (rt_ret != RT_ERROR_NONE || is_single_op_) {
     void *bin_handle = nullptr;
     if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
       GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h
index 3f350531..69a74ea9 100755
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -50,6 +50,8 @@ class AiCoreOpTask {
 
   uint32_t GetBlockDim() const {return block_dim_;}
 
+  void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;};
+
  protected:
   Status UpdateTilingInfo(TaskContext &context);
   virtual std::string GetKeyForOpParamSize() const;
@@ -72,6 +74,7 @@ class AiCoreOpTask {
   uint32_t args_size_ = 0;
   uint32_t block_dim_ = 1;
   bool clear_atomic_ = true;
+  bool is_single_op_ = false;
   std::vector<int> output_indices_to_skip_;
 };
 
diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc
index b2996435..2bf2cb36 100755
--- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc
@@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector
     : op_desc_(op_desc), task_defs_(task_defs) {
 }
 
-Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) {
+Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task,
+                                    bool ignore_failure_on_atomic,
+                                    bool is_single_op) {
   GE_CHECK_NOTNULL(op_desc_);
   if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) {
     GELOGE(INTERNAL_ERROR,
@@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i
     auto atomic_task =
         std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask());
     GE_CHECK_NOTNULL(atomic_task);
+    atomic_task->SetSingleOp(is_single_op);
     GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()),
                       "[%s] Failed to init task for AtomicAddrClean",
                       op_desc_->GetName().c_str());
@@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i
   // build aicore task
   auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask());
   GE_CHECK_NOTNULL(aicore_task);
+  aicore_task->SetSingleOp(is_single_op);
   GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()),
                     "[%s] Failed to init task for AtomicAddrClean",
                     op_desc_->GetName().c_str());
diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.h b/ge/hybrid/node_executor/aicore/aicore_task_builder.h
index 92db809d..8f95df15 100755
--- a/ge/hybrid/node_executor/aicore/aicore_task_builder.h
+++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.h
@@ -47,7 +47,7 @@ class AiCoreTaskBuilder {
   AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs);
   ~AiCoreTaskBuilder() = default;
 
-  Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic);
+  Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false);
 
  private:
   bool ExpectAtomicAddrCleanTask();
diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
index 50890d6a..d7d0f547 100755
--- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
+++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
@@ -27,7 +27,7 @@ namespace ge {
 namespace hybrid {
 REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor);
 
-const std::unordered_map<std::string, std::vector<uint32_t>>
+const std::map<std::string, std::vector<uint32_t>>
     RefInputTask::out_ref_input_index_ = {{DATA, {}},
                                           {AIPPDATA, {}},
                                           {RESHAPE, {}},
@@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<uint32_t>>
                                           {BROADCASTGRADIENTARGS, {}}
                                          };
 
-const std::unordered_set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE};
+const std::set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE};
 
 Status RefInputTask::UpdateArgs(TaskContext &) {
   // no need update args
diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h
index 9de8d0f9..c8d64d09 100644
--- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h
+++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h
@@ -46,7 +46,7 @@ class RefInputTask : public NodeTask {
 
   // key is op type, value is output ref input index,
   // e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one
-  static const std::unordered_map<std::string, std::vector<uint32_t>> out_ref_input_index_;
+  static const std::map<std::string, std::vector<uint32_t>> out_ref_input_index_;
 };
 
 class DependInputShapeTask : public NodeTask {
@@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask {
   const NodePtr node_;
 
   // ops depend input shape
-  static const std::unordered_set<std::string> depend_input_shape_ops_;
+  static const std::set<std::string> depend_input_shape_ops_;
 };
 
 class ConstantNodeTask : public NodeTask {
diff --git a/ge/init/gelib.h b/ge/init/gelib.h
index e52b8dd6..885ae867 100644
--- a/ge/init/gelib.h
+++ b/ge/init/gelib.h
@@ -31,7 +31,7 @@ using std::map;
 using std::vector;
 
 namespace ge {
-class GELib {
+class GE_FUNC_VISIBILITY GELib {
  public:
   GELib() = default;
   ~GELib() = default;
diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc
index 5b82f8f2..42a78dde 100755
--- a/ge/ir_build/atc_ir_common.cc
+++ b/ge/ir_build/atc_ir_common.cc
@@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) {
   return ge::SUCCESS;
 }
 
-bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
+bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                           std::string &dynamic_batch_size) {
   int32_t size = 0;
   for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) {
@@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>>
   return true;
 }
 
-bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
+bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                           const std::string input_format, std::string &dynamic_image_size) {
   if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) {
     GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str());
@@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>>
   return true;
 }
 
-bool CheckDynamicDimsInputShapeValid(const unordered_map<string, vector<int64_t>> &shape_map,
+bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_map,
                                      string input_format, string &dynamic_dims) {
   if (input_format != "ND") {
     ErrorManager::GetInstance().ATCReportErrMessage(
@@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
     return ge::SUCCESS;
   }
 
-  unordered_map<string, vector<int64_t>> shape_map;
+  map<string, vector<int64_t>> shape_map;
   vector<pair<string, vector<int64_t>>> user_shape_map;
   is_dynamic_input = true;
   if (input_shape.empty()) {
@@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
   return ge::SUCCESS;
 }
 
-bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
+bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &shape_map,
                      vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) {
   vector<string> shape_vec = StringUtils::Split(input_shape, ';');
   const int DEFAULT_SHAPE_PAIR_SIZE = 2;
diff --git a/ge/ir_build/atc_ir_common.h b/ge/ir_build/atc_ir_common.h
index 2580a206..2ad4efa8 100644
--- a/ge/ir_build/atc_ir_common.h
+++ b/ge/ir_build/atc_ir_common.h
@@ -46,13 +46,13 @@ static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat =
 static const std::string kEnableCompressWeightTrue = "1";
 static const std::string kEnableCompressWeightFalse = "0";
 
-bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
+bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                           std::string &dynamic_batch_size);
 
-bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
+bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map,
                                           const std::string input_format, std::string &dynamic_image_size);
 
-bool CheckDynamicDimsInputShapeValid(const std::unordered_map<std::string, std::vector<int64_t>> &shape_map,
+bool CheckDynamicDimsInputShapeValid(const std::map<std::string, std::vector<int64_t>> &shape_map,
                                      std::string input_format, std::string &dynamic_dims);
 
 bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims);
@@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string
                                    std::string &dynamic_dims, const std::string input_shape,
                                    const std::string input_format, bool &is_dynamic_input);
 
-bool ParseInputShape(const std::string &input_shape, std::unordered_map<string, std::vector<int64_t>> &shape_map,
+bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map,
                      std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false);
 
 Status CheckOutputTypeParamValid(const std::string output_type);
diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc
index 3d00ff7f..9197d52f 100644
--- a/ge/ir_build/ge_ir_build.cc
+++ b/ge/ir_build/ge_ir_build.cc
@@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) {
   if (options_.find(kInputShape) == options_.end()) {
     return GRAPH_SUCCESS;
   }
-  unordered_map<string, vector<int64_t>> shape_map;
+  map<string, vector<int64_t>> shape_map;
   vector<pair<string, vector<int64_t>>> user_shape_map;
   GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true),
     return GRAPH_PARAM_INVALID, "parse input shape failed!");
diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt
index 3f8d43dc..0079576a 100644
--- a/ge/offline/CMakeLists.txt
+++ b/ge/offline/CMakeLists.txt
@@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE
     -O2
     -Wno-deprecated-declarations
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(atc_atc.bin PRIVATE
@@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE
     COMPILE_OMG_PACKAGE
     google=ascend_private
     LOG_CPP
+    FUNC_VISIBILITY
 )
 
 target_include_directories(atc_atc.bin PRIVATE
@@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
+target_link_options(atc_atc.bin PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(atc_atc.bin PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     ascend_protobuf
@@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE
     -O2
     -Wno-deprecated-declarations
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(fwk_atc.bin PRIVATE
@@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE
     COMPILE_OMG_PACKAGE
     google=ascend_private
     LOG_CPP
+    FUNC_VISIBILITY
 )
 
 target_include_directories(fwk_atc.bin PRIVATE
@@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )
 
+target_link_options(fwk_atc.bin PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(fwk_atc.bin PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     ascend_protobuf
diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h
index 7a95ddfa..8e1dec28 100644
--- a/ge/opskernel_manager/ops_kernel_builder_manager.h
+++ b/ge/opskernel_manager/ops_kernel_builder_manager.h
@@ -23,7 +23,7 @@
 
 namespace ge {
 using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>;
-class OpsKernelBuilderManager {
+class GE_FUNC_VISIBILITY OpsKernelBuilderManager {
  public:
   ~OpsKernelBuilderManager();
 
diff --git a/ge/opskernel_manager/ops_kernel_manager.h b/ge/opskernel_manager/ops_kernel_manager.h
index b34c483e..19d703e3 100644
--- a/ge/opskernel_manager/ops_kernel_manager.h
+++ b/ge/opskernel_manager/ops_kernel_manager.h
@@ -41,7 +41,7 @@ using std::vector;
 namespace ge {
 using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>;
 
-class OpsKernelManager {
+class GE_FUNC_VISIBILITY OpsKernelManager {
  public:
   friend class GELib;
 
diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt
index f6353231..e5736b51 100644
--- a/ge/plugin/engine/CMakeLists.txt
+++ b/ge/plugin/engine/CMakeLists.txt
@@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST})
 target_compile_options(engine PRIVATE
     -Werror
     -fno-common
+    -fvisibility=hidden
 )
 
 target_compile_definitions(engine PRIVATE
     REUSE_MEMORY=1
     PROTOBUF_INLINE_NOT_IN_HEADERS=0
+    FUNC_VISIBILITY
 )
 
 target_include_directories(engine PRIVATE
@@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE
     ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
+target_link_options(engine PRIVATE
+    -Wl,-Bsymbolic
+)
+
 target_link_libraries(engine PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     -Wl,--no-as-needed
diff --git a/ge/plugin/engine/dnnengines.h b/ge/plugin/engine/dnnengines.h
index 4a2a9df5..0633c104 100644
--- a/ge/plugin/engine/dnnengines.h
+++ b/ge/plugin/engine/dnnengines.h
@@ -25,7 +25,7 @@
 #include "plugin/engine/engine_manage.h"
 
 namespace ge {
-class AICoreDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine {
  public:
   AICoreDNNEngine() = default;
   explicit AICoreDNNEngine(const std::string &engine_name);
@@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine {
   DNNEngineAttribute engine_attribute_;
 };
 
-class VectorCoreDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine {
  public:
   VectorCoreDNNEngine() = default;
   explicit VectorCoreDNNEngine(const std::string &engine_name);
@@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine {
 };
 
 
-class AICpuDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine {
  public:
   AICpuDNNEngine() = default;
   explicit AICpuDNNEngine(const std::string &engine_name);
@@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine {
   DNNEngineAttribute engine_attribute_;
 };
 
-class AICpuTFDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine {
  public:
   AICpuTFDNNEngine() = default;
   explicit AICpuTFDNNEngine(const std::string &engine_name);
@@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine {
   DNNEngineAttribute engine_attribute_;
 };
 
-class GeLocalDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine {
  public:
   GeLocalDNNEngine() = default;
   explicit GeLocalDNNEngine(const std::string &engine_name);
@@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine {
   DNNEngineAttribute engine_attribute_;
 };
 
-class HostCpuDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine {
 public:
   HostCpuDNNEngine() = default;
   explicit HostCpuDNNEngine(const std::string &engine_name);
@@ -116,7 +116,7 @@ private:
   DNNEngineAttribute engine_attribute_;
 };
 
-class RtsDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine {
  public:
   RtsDNNEngine() = default;
   explicit RtsDNNEngine(const std::string &engine_name);
@@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine {
   DNNEngineAttribute engine_attribute_;
 };
 
-class HcclDNNEngine : public DNNEngine {
+class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine {
  public:
   HcclDNNEngine() = default;
   explicit HcclDNNEngine(const std::string &engine_name);
diff --git a/ge/plugin/engine/engine_manage.h b/ge/plugin/engine/engine_manage.h
index 5203ad3a..7eb88805 100644
--- a/ge/plugin/engine/engine_manage.h
+++ b/ge/plugin/engine/engine_manage.h
@@ -17,6 +17,20 @@
 #ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_
 #define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <map>
 #include <memory>
 #include <string>
@@ -26,7 +40,7 @@
 
 namespace ge {
 using DNNEnginePtr = std::shared_ptr<DNNEngine>;
-class EngineManager {
+class GE_FUNC_VISIBILITY EngineManager {
  public:
   static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr);
   static DNNEnginePtr GetEngine(const std::string &engine_name);
@@ -34,7 +48,7 @@ class EngineManager {
 };
 
 extern "C" {
-void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines);
+GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines);
 }
 }  // namespace ge
 #endif  // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_
diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc
index 5a67f7cd..6a56fc05 100755
--- a/ge/session/inner_session.cc
+++ b/ge/session/inner_session.cc
@@ -77,6 +77,23 @@ Status InnerSession::Initialize() {
 
   UpdateThreadContext(std::map<std::string, std::string>{});
 
+  // session device id set here
+  std::string str_session_device_id;
+  if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) {
+    GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str());
+
+    uint32_t session_device_id = 0;
+    try {
+      session_device_id = static_cast<uint32_t>(std::stoi(str_session_device_id.c_str()));
+      // session device id has priority
+      GetContext().SetCtxDeviceId(session_device_id);
+    } catch (std::invalid_argument &) {
+      GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str());
+    } catch (std::out_of_range &) {
+      GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str());
+    }
+  }
+
   GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId()));
 
   DumpProperties dump_properties;
diff --git a/ge/session/omg.cc b/ge/session/omg.cc
index 47073fc0..368b4bec 100755
--- a/ge/session/omg.cc
+++ b/ge/session/omg.cc
@@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format,
   }
 
   // Analyze the input shape paramete
-  unordered_map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims;
+  map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims;
 
   if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims,
                            is_dynamic_input) ||
@@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) {
 ///
 static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) {
   GE_CHECK_NOTNULL(graph);
-  unordered_map<string, string> graphNodeTypes;
+  map<string, string> graphNodeTypes;
   for (const NodePtr &node : graph->GetAllNodes()) {
     auto op_desc = node->GetOpDesc();
     if (op_desc == nullptr) {
diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index 4f32bd6b..168ca2c5 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
                                      const vector<DataBuffer> &input_buffers,
                                      vector<GeTensorDesc> &output_desc,
                                      vector<DataBuffer> &output_buffers) {
-  GE_CHECK_NOTNULL(op_task_);
   GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
+  if (hybrid_model_executor_ != nullptr) {
+    GELOGD("Execute multi-task dynamic single op by hybrid model executor");
+    hybrid::HybridModelExecutor::ExecuteArgs args;
+    for (auto &input : input_buffers) {
+      args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
+    }
+    for (auto &output : output_buffers) {
+      args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
+    }
+    for (auto &tensor_desc : input_desc) {
+      auto desc = MakeShared<GeTensorDesc>(tensor_desc);
+      GE_CHECK_NOTNULL(desc);
+      args.input_desc.emplace_back(desc);
+    }
+
+    return hybrid_model_executor_->Execute(args);
+  }
+
   std::lock_guard<std::mutex> lk(*stream_mutex_);
+  GE_CHECK_NOTNULL(op_task_);
 
   GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
   GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h
index d677f94a..b350b684 100755
--- a/ge/single_op/single_op.h
+++ b/ge/single_op/single_op.h
@@ -28,6 +28,7 @@
 #include "runtime/stream.h"
 #include "task/op_task.h"
 #include "cce/aicpu_engine_struct.h"
+#include "hybrid/executor/hybrid_model_executor.h"
 
 namespace ge {
 class StreamResource;
@@ -46,7 +47,7 @@ class SingleOp {
   Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
 
   friend class SingleOpModel;
-  StreamResource *stream_resource_;
+  StreamResource *stream_resource_ = nullptr;
   std::mutex *stream_mutex_;
   rtStream_t stream_ = nullptr;
   std::vector<void *> input_addr_list_;
@@ -77,6 +78,8 @@ class DynamicSingleOp {
                         std::vector<DataBuffer> &outputs) const;
 
   std::unique_ptr<OpTask> op_task_;
+  std::unique_ptr<hybrid::HybridModel> hybrid_model_;
+  std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
   uintptr_t resource_id_ = 0;
   std::mutex *stream_mutex_;
   rtStream_t stream_ = nullptr;
diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc
index 7d092091..1b776cc8 100755
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -31,6 +31,8 @@
 #include "task/aicpu_task_builder.h"
 #include "task/aicpu_kernel_task_builder.h"
 #include "task/tbe_task_builder.h"
+#include "hybrid/executor/hybrid_model_executor.h"
+#include "hybrid/node_executor/node_executor.h"
 
 static std::atomic<std::uint64_t> aicpu_kernel_id(0);
 
@@ -42,6 +44,20 @@ namespace ge {
 namespace {
 const size_t kDataOutputNum = 1;
 }  // namespace
+static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
+  auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
+  for (const auto &node : comp_graph->GetAllNodes()) {
+    auto op_desc = node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    const auto &depends = op_desc->GetOpInferDepends();
+    if (!depends.empty()) {
+      flag = true;
+      return SUCCESS;
+    }
+  }
+  return SUCCESS;
+}
+
 SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size)
     : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {}
 
@@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
   GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
   model_params_.memory_size = UINT_MAX;
+
+  auto ge_model = model_helper_.GetGeModel();
+  GE_CHECK_NOTNULL(ge_model);
+  bool infer_depend_flag = false;
+  GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag));
+  if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) {
+    GELOGD("Build single op HybridModel.");
+    GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
+    auto root_model = model_helper_.GetGeRootModel();
+    GE_CHECK_NOTNULL(root_model);
+    root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
+    root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
+    single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
+    GE_CHECK_NOTNULL(single_op.hybrid_model_);
+    GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model");
+    int32_t device_id = 0;
+    GE_CHK_RT_RET(rtGetDevice(&device_id));
+    single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
+                                                                                         device_id,
+                                                                                         resource.GetStream()));
+    GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
+    GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model");
+    return SUCCESS;
+  }
   return BuildTaskListForDynamicOp(single_op);
 }
 }  // namespace ge
diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc
index db6b7c47..a3acf6b7 100755
--- a/ge/single_op/stream_resource.cc
+++ b/ge/single_op/stream_resource.cc
@@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) {
   return it->second.get();
 }
 
+rtStream_t StreamResource::GetStream() const {
+  return stream_;
+}
+
 void StreamResource::SetStream(rtStream_t stream) {
   stream_ = stream;
 }
diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h
index d5bc941a..d2c1ca36 100755
--- a/ge/single_op/stream_resource.h
+++ b/ge/single_op/stream_resource.h
@@ -37,6 +37,7 @@ class StreamResource {
   StreamResource(StreamResource &&) = delete;
   StreamResource &operator=(const StreamResource &) = delete;
   StreamResource &operator=(StreamResource &&) = delete;
+  rtStream_t GetStream() const;
   void SetStream(rtStream_t stream);
 
   SingleOp *GetOperator(const void *key);
diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py
index 1476d505..f20d23a8 100644
--- a/ge/stub/gen_stubapi.py
+++ b/ge/stub/gen_stubapi.py
@@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve
 """
     this attr is used for symbol table visible
 """
-GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
+GE_ATTR = 'GE_FUNC_VISIBILITY'
 
 """
     generate stub func body by return type
diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h
index 9c26ebf8..cd4ca323 100644
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map<AscendString
 }
 
 // Initialize GE
-ATTRIBUTED_DEPRECATED(Status GEInitialize(const std::map<AscendString, AscendString> &))
-Status GEInitialize(const std::map<std::string, std::string> &options);
+ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &))
+GE_FUNC_VISIBILITY Status GEInitialize(const std::map<std::string, std::string> &options);
 
-Status GEInitialize(const std::map<AscendString, AscendString> &options);
+GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &options);
 
 // Finalize GE, release all resources
-Status GEFinalize();
+GE_FUNC_VISIBILITY Status GEFinalize();
 
-class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session {
+class GE_FUNC_VISIBILITY Session {
  public:
   ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &))
   explicit Session(const std::map<std::string, std::string> &options);
diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h
index e77f817c..274a9784 100644
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -28,7 +28,7 @@ namespace ge {
 #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead."))
 #endif
 
-class StatusFactory {
+class GE_FUNC_VISIBILITY StatusFactory {
  public:
   static StatusFactory *Instance() {
     static StatusFactory instance;
@@ -70,7 +70,7 @@ class StatusFactory {
   std::map<uint32_t, std::string> err_desc_;
 };
 
-class ErrorNoRegisterar {
+class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  public:
   ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
   ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h
index 041fc7ae..b477a18c 100644
--- a/inc/external/ge/ge_error_codes.h
+++ b/inc/external/ge/ge_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <stddef.h>
 
 #ifdef __cplusplus
diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h
index afaf42ac..889e2bea 100644
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_IR_BUILD_H_
 #define INC_EXTERNAL_GE_IR_BUILD_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <string>
 #include <map>
 #include <memory>
@@ -44,17 +58,17 @@ struct ModelBufferData {
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &))
-graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options);
+ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &))
+GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options);
 
-graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options);
+GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options);
 
 /**
  * @ingroup AscendCL
  * @brief build model.Notice the model is stored in buffer
  *
  */
-void aclgrphBuildFinalize();
+GE_FUNC_VISIBILITY void aclgrphBuildFinalize();
 
 /**
  * @ingroup AscendCL
@@ -66,12 +80,12 @@ void aclgrphBuildFinalize();
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &,
+ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &,
                                                     ModelBufferData &))
-graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options,
+GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options,
                               ModelBufferData &model);
 
-graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options,
+GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options,
                               ModelBufferData &model);
 
 /**
@@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendStrin
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ATTRIBUTED_DEPRECATED(graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
-graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);
+ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
+GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);
 
-graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
+GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
 
 /**
  * @ingroup AscendCL
@@ -98,7 +112,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);
+GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);
 
 /**
  * @ingroup AscendCL
@@ -110,7 +124,7 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
+GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
 
 /**
  * @ingroup AscendCL
@@ -123,7 +137,7 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz
  * @retval GRAPH_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs,
+GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs,
                                  const std::vector<TensorDesc> &outputs, Graph &graph);
 
 };      // namespace ge
diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h
index 07cd1664..c1359a20 100644
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -37,7 +37,7 @@ extern "C" {
 // trace status of log
 enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };
 
-class GeLog {
+class GE_FUNC_VISIBILITY GeLog {
  public:
   static uint64_t GetTid() {
 #ifdef __GNUC__
diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h
index 31281cd6..58cb3693 100644
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -278,7 +278,7 @@
   } while (0)
 
 template <typename T>
-std::string FmtToStr(const T &t) {
+GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
   std::string fmt;
   std::stringstream st;
   st << "[" << t << "]";
diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h
index 358fca04..e910e346 100644
--- a/inc/framework/common/fmk_error_codes.h
+++ b/inc/framework/common/fmk_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_
 #define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <map>
 #include <string>
 
@@ -38,7 +52,7 @@ const int MODID_OME = 2;          // OME module ID
 const int MODID_CALIBRATION = 3;  // Calibration module ID
 
 namespace domi {
-class StatusFactory {
+class GE_FUNC_VISIBILITY StatusFactory {
  public:
   static StatusFactory *Instance();
 
@@ -54,7 +68,7 @@ class StatusFactory {
   std::map<uint32_t, std::string> err_desc_;
 };
 
-class ErrorNoRegisterar {
+class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  public:
   ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
   ~ErrorNoRegisterar() {}
diff --git a/inc/framework/common/ge_format_util.h b/inc/framework/common/ge_format_util.h
index 9b1d7786..dfceefb8 100644
--- a/inc/framework/common/ge_format_util.h
+++ b/inc/framework/common/ge_format_util.h
@@ -23,7 +23,7 @@
 #include "graph/tensor.h"
 
 namespace ge {
-class GeFormatUtil {
+class GE_FUNC_VISIBILITY GeFormatUtil {
  public:
   ///
   /// @name   TransShape
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index 9ca77f1c..ec5adcba 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -215,7 +215,7 @@ struct ModelInfo {
 };
 
 // Asynchronous callback interface, implemented by the caller
-class ModelListener {
+class GE_FUNC_VISIBILITY ModelListener {
  public:
   virtual ~ModelListener() {}
   ///
diff --git a/inc/framework/common/gflags_util.h b/inc/framework/common/gflags_util.h
index 94d66ffb..6e9ea41b 100644
--- a/inc/framework/common/gflags_util.h
+++ b/inc/framework/common/gflags_util.h
@@ -17,11 +17,25 @@
 #ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_
 #define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <gflags/gflags.h>
 #include <string>
 
 namespace ge {
-class GflagsUtils {
+class GE_FUNC_VISIBILITY GflagsUtils {
  public:
   static bool IsSetCommandTrue(const char *name) {
     std::string out;
diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h
index 4a169dda..e25d5d6f 100644
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -28,7 +28,7 @@
 #include "model/ge_root_model.h"
 
 namespace ge {
-class ModelHelper {
+class GE_FUNC_VISIBILITY ModelHelper {
  public:
   ModelHelper() = default;
   ~ModelHelper();
diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h
index 98ad55d7..34509b39 100644
--- a/inc/framework/common/helper/om_file_helper.h
+++ b/inc/framework/common/helper/om_file_helper.h
@@ -51,7 +51,7 @@ struct SaveParam {
   std::string model_name;
 };
 
-class OmFileLoadHelper {
+class GE_FUNC_VISIBILITY OmFileLoadHelper {
  public:
   Status Init(const ge::ModelData &model);
 
@@ -77,7 +77,7 @@ class OmFileLoadHelper {
   bool is_inited_{false};
 };
 
-class OmFileSaveHelper {
+class GE_FUNC_VISIBILITY OmFileSaveHelper {
  public:
   ModelFileHeader &GetModelFileHeader() { return model_header_; }
 
diff --git a/inc/framework/common/l2_cache_optimize.h b/inc/framework/common/l2_cache_optimize.h
index c65f67b3..fdb1c8b5 100644
--- a/inc/framework/common/l2_cache_optimize.h
+++ b/inc/framework/common/l2_cache_optimize.h
@@ -69,7 +69,7 @@ struct RCMemoryBlock {
 };
 
 // L2Cache optimizer
-class L2CacheOptimize {
+class GE_FUNC_VISIBILITY L2CacheOptimize {
  public:
   explicit L2CacheOptimize(ge::ComputeGraphPtr &graph);
   ~L2CacheOptimize();
diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h
index e3803b78..28d48c1d 100644
--- a/inc/framework/common/op/attr_value_util.h
+++ b/inc/framework/common/op/attr_value_util.h
@@ -17,6 +17,20 @@
 #ifndef INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
 #define INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <google/protobuf/map.h>
 #include <unordered_map>
 #include <string>
@@ -34,127 +48,127 @@ namespace ge {
 using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>;
 using AttrDefPair = ::google::protobuf::MapPair<std::string, domi::AttrDef>;
 
-void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef);
 // DEFINE_ADD_ATTR_VALUE
-void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs);
-void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs);
-void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs);
-
-void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs);
+
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs);
 
 // DEFINE_ADD_ATTR_VALUE
-void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const char *value, OpDef *opdef);
-void AddOpAttr(const char *key, const char *value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const float value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const double value, OpDef *opdef);
-void AddOpAttr(const std::string &key, const bool value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef);
 
-void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef);
 
-void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef);
 
 // DEFINE_ADD_ATTR_VALUE_LIST
-void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs);
-void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs);
-void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs);
-void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs);
-void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs);
-void AddOpAttrList(const std::string &key, const double value, OpDef *opdef);
-void AddOpAttrList(const std::string &key, const float value, OpDef *opdef);
-void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef);
-void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef);
-void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef);
-void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef);
-
-void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef);
-
-bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef);
-bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef);
-
-uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef);
-uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef);
-uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef);
-uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef);
-uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef);
-uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef);
-uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef);
-
-bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef);
-bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def);
-
-void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def);
-void AddModelAttr(const char *key, const char *value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const float value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const double value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def);
-void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def);
-
-void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def);
-void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def);
-void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def);
-void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def);
-void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def);
-
-bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def);
-bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def);
-
-bool HasOpAttr(const OpDef *opdef, const std::string &attr_name);
-
-void SetAttrDef(const std::string &value, AttrDef *out);
-void SetAttrDef(const char *value, AttrDef *out);
-void SetAttrDef(const uint32_t value, AttrDef *out);
-void SetAttrDef(const int32_t value, AttrDef *out);
-void SetAttrDef(const float value, AttrDef *out);
-void SetAttrDef(const double value, AttrDef *out);
-void SetAttrDef(const bool value, AttrDef *out);
-void SetAttrList(const std::string &value, AttrDef *out);
-void SetAttrList(const bool value, AttrDef *out);
-void SetAttrList(const float value, AttrDef *out);
-void SetAttrList(const double value, AttrDef *out);
-void SetAttrList(const uint32_t value, AttrDef *out);
-
-bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr);
-bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr);
-bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr);
-
-bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr);
-bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr);
-bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr);
-bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef);
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef);
+
+GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef);
+
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef);
+
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef);
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef);
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef);
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef);
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef);
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef);
+GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef);
+
+GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef);
+GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def);
+
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def);
+
+GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def);
+GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def);
+
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def);
+GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def);
+
+GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name);
+
+GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out);
+GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out);
+
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr);
+GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr);
+
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr);
+GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr);
 }
 
 #endif  // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h
index aa50c8a1..89529520 100644
--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -34,36 +34,36 @@ namespace ge {
 using domi::Status;
 
 // Add Sub Mul
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t ADD_INPUT_NUM;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SUB_INPUT_NUM;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MUL_INPUT_NUM;
+GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM;
+GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM;
+GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM;
 
 // Permute
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM;
+GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM;
 
 // Ssd PriroBox
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE;
+GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE;
 
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM;
+GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM;
 
 // Switch
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_INPUT_NUM;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_OUTPUT_NUM;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_FALSE_OUTPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_TRUE_OUTPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t SWITCH_INPUT_NUM;
+GE_FUNC_VISIBILITY extern const uint32_t SWITCH_OUTPUT_NUM;
+GE_FUNC_VISIBILITY extern const uint32_t SWITCH_FALSE_OUTPUT;
+GE_FUNC_VISIBILITY extern const uint32_t SWITCH_TRUE_OUTPUT;
+GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT;
 
 // FunctionOp
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t IF_COND_INPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_START_INPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_DELTA_INPUT;
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_DATA_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t FOR_START_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT;
+GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT;
 
-FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int NORMAL_TENSOR_SIZE;
+GE_FUNC_VISIBILITY extern const int NORMAL_TENSOR_SIZE;
 
-class OpUtils {
+class GE_FUNC_VISIBILITY OpUtils {
  public:
   ///
   /// @ingroup domi_ome
diff --git a/inc/framework/common/op_types.h b/inc/framework/common/op_types.h
index 4555d5c3..fa41c1b6 100644
--- a/inc/framework/common/op_types.h
+++ b/inc/framework/common/op_types.h
@@ -21,7 +21,7 @@
 #include <string>
 
 namespace ge {
-class OpTypeContainer {
+class GE_FUNC_VISIBILITY OpTypeContainer {
  public:
   static OpTypeContainer *Instance() {
     static OpTypeContainer instance;
@@ -43,7 +43,7 @@ class OpTypeContainer {
   std::set<std::string> op_type_list_;
 };
 
-class OpTypeRegistrar {
+class GE_FUNC_VISIBILITY OpTypeRegistrar {
  public:
   explicit OpTypeRegistrar(const std::string &op_type) { OpTypeContainer::Instance()->Register(op_type); }
   ~OpTypeRegistrar() {}
diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h
index 83699754..7017aca3 100644
--- a/inc/framework/common/profiling/ge_profiling.h
+++ b/inc/framework/common/profiling/ge_profiling.h
@@ -38,9 +38,9 @@ struct ProfCommandHandleData {
   uint32_t modelId;
 };
 
-ge::Status RegProfCtrlCallback(MsprofCtrlCallback func);
-ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func);
-ge::Status RegProfReporterCallback(MsprofReporterCallback func);
-ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len);
+GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func);
+GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func);
+GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func);
+GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len);
 
 #endif  // INC_FRAMEWORK_COMMON_GE_PROFILING_H_
diff --git a/inc/framework/common/profiling/ge_runner_profiling.h b/inc/framework/common/profiling/ge_runner_profiling.h
index d2eff767..011797a3 100644
--- a/inc/framework/common/profiling/ge_runner_profiling.h
+++ b/inc/framework/common/profiling/ge_runner_profiling.h
@@ -19,6 +19,6 @@
 
 #include "profiling/ge_profiling.h"
 
-bool IsInitialize();
+GE_FUNC_VISIBILITY bool IsInitialize();
 
 #endif  // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_
diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h
index 001a0e75..62ae4b6d 100644
--- a/inc/framework/common/scope_guard.h
+++ b/inc/framework/common/scope_guard.h
@@ -29,7 +29,7 @@
 #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss()
 
 namespace ge {
-class ScopeGuard {
+class GE_FUNC_VISIBILITY ScopeGuard {
  public:
   // Noncopyable
   ScopeGuard(ScopeGuard const &) = delete;
diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h
index de19807c..f0368363 100644
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -17,6 +17,20 @@
 #ifndef INC_FRAMEWORK_COMMON_STRING_UTIL_H_
 #define INC_FRAMEWORK_COMMON_STRING_UTIL_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <cctype>
 #include <securec.h>
 
@@ -27,7 +41,7 @@
 #include <vector>
 
 namespace ge {
-class StringUtils {
+class GE_FUNC_VISIBILITY StringUtils {
  public:
   static std::string &Ltrim(std::string &s) {
 #if __cplusplus >= 201103L
diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h
index 42ab3868..525cf3ea 100644
--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -237,7 +237,7 @@ const int32_t DOMI_MAX_PATH_LEN = 256;
 /// @return true success
 /// @return false fail
 ///
-bool ReadProtoFromBinaryFile(const char *file, Message *proto);
+GE_FUNC_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto);
 
 ///
 /// @ingroup domi_common
@@ -248,7 +248,7 @@ bool ReadProtoFromBinaryFile(const char *file, Message *proto);
 /// @return true success
 /// @return false fail
 ///
-bool ReadProtoFromArray(const void *data, int size, Message *proto);
+GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *proto);
 
 ///
 /// @ingroup domi_proto
@@ -258,9 +258,9 @@ bool ReadProtoFromArray(const void *data, int size, Message *proto);
 /// @return true success
 /// @return false fail
 ///
-bool ReadProtoFromText(const char *file, google::protobuf::Message *message);
+GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message);
 
-bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message);
+GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message);
 
 ///
 /// @ingroup: domi_common
@@ -268,7 +268,7 @@ bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *mes
 /// @param [in] input_file: path of file
 /// @return long： File length. If the file length fails to be obtained, the value -1 is returned.
 ///
-extern long GetFileLength(const std::string &input_file);
+GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file);
 
 ///
 /// @ingroup domi_common
@@ -279,9 +279,9 @@ extern long GetFileLength(const std::string &input_file);
 /// @return false fail
 /// @return true success
 ///
-bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length);
+GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length);
 
-bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);
+GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);
 
 ///
 /// @ingroup domi_common
@@ -290,14 +290,14 @@ bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);
 /// @return 0 success
 /// @return -1 fail
 ///
-extern int CreateDirectory(const std::string &directory_path);
+GE_FUNC_VISIBILITY extern int CreateDirectory(const std::string &directory_path);
 
 ///
 /// @ingroup domi_common
 /// @brief Obtains the current time string.
 /// @return Time character string in the format ： %Y%m%d%H%M%S, eg: 20171011083555
 ///
-std::string CurrentTimeInStr();
+GE_FUNC_VISIBILITY std::string CurrentTimeInStr();
 
 ///
 /// @ingroup domi_common
@@ -306,7 +306,7 @@ std::string CurrentTimeInStr();
 /// @return string
 ///
 template <typename T>
-std::string ToString(std::vector<T> &v) {
+GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
   std::stringstream ss;
   ss << "[";
   for (T x : v) {
@@ -326,7 +326,7 @@ std::string ToString(std::vector<T> &v) {
 /// @return string
 ///
 template <typename T>
-std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
+GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
   std::stringstream ss;
   ss << "[";
   for (T x : rpd_field) {
@@ -345,7 +345,7 @@ std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
 /// @return Timestamp, in microseconds (US)
 ///
 ///
-uint64_t GetCurrentTimestamp();
+GE_FUNC_VISIBILITY uint64_t GetCurrentTimestamp();
 
 ///
 /// @ingroup domi_common
@@ -353,7 +353,7 @@ uint64_t GetCurrentTimestamp();
 /// @return Timestamp, in seconds (US)
 ///
 ///
-uint32_t GetCurrentSecondTimestap();
+GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap();
 
 ///
 /// @ingroup domi_common
@@ -362,7 +362,7 @@ uint32_t GetCurrentSecondTimestap();
 /// @param [in] b
 /// @return false: true: The result is within the normal int64 range.
 ///
-bool CheckInt64MulOverflow(int64_t a, int64_t b);
+GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
 
 ///
 /// @ingroup domi_common
@@ -370,7 +370,7 @@ bool CheckInt64MulOverflow(int64_t a, int64_t b);
 /// @param [in] path of input file
 /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned
 ///
-std::string RealPath(const char *path);
+GE_FUNC_VISIBILITY std::string RealPath(const char *path);
 
 ///
 /// @ingroup domi_common
@@ -381,7 +381,7 @@ std::string RealPath(const char *path);
 /// @param [in] file_path path of input file
 /// @param [out] result
 ///
-bool CheckInputPathValid(const std::string &file_path, const std::string &atc_param = "");
+GE_FUNC_VISIBILITY bool CheckInputPathValid(const std::string &file_path, const std::string &atc_param = "");
 
 ///
 /// @ingroup domi_common
@@ -389,7 +389,7 @@ bool CheckInputPathValid(const std::string &file_path, const std::string &atc_pa
 /// @param [in] file_path path of output file
 /// @param [out] result
 ///
-bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_param = "");
+GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_param = "");
 
 ///
 /// @ingroup domi_common
@@ -397,7 +397,7 @@ bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_p
 /// @param [in] filePath file path
 /// @param [out] result
 ///
-bool ValidateStr(const std::string &filePath, const std::string &mode);
+GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::string &mode);
 
 ///
 /// @ingroup domi_common
@@ -405,7 +405,7 @@ bool ValidateStr(const std::string &filePath, const std::string &mode);
 /// @param [in] file_path file path
 /// @param [out] result
 ///
-bool IsValidFile(const char *file_path);
+GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path);
 
 ///
 /// @ingroup domi_common
@@ -415,7 +415,7 @@ bool IsValidFile(const char *file_path);
 /// @return 0 success
 /// @return -1 fail
 ///
-Status CheckPath(const char *path, size_t length);
+GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length);
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_COMMON_UTIL_H_
diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h
index 1bcf5e07..8a0f3b65 100644
--- a/inc/framework/engine/dnnengine.h
+++ b/inc/framework/engine/dnnengine.h
@@ -45,7 +45,7 @@ struct DNNEngineAttribute {
   Format engine_output_format;
 };
 
-class DNNEngine {
+class GE_FUNC_VISIBILITY DNNEngine {
  public:
   virtual ~DNNEngine() = default;
   virtual Status Initialize(const std::map<std::string, std::string> &options) = 0;
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index 3136e172..c546f63d 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -46,7 +46,7 @@ struct RunModelData {
   std::vector<uint64_t> dynamic_dims;  // Dynamic dims scene, set dynamic dims, not supported by default:empty
 };
 
-class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
+class GE_FUNC_VISIBILITY GeExecutor {
  public:
   GeExecutor();
   ~GeExecutor() = default;
diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h
index e0904965..2d7d007b 100644
--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -31,7 +31,7 @@
 #include "omg/omg_inner_types.h"
 
 namespace ge {
-class GeGenerator {
+class GE_FUNC_VISIBILITY GeGenerator {
  public:
   static GeGenerator &GetInstance() {
     static GeGenerator Instance;
diff --git a/inc/framework/generator/generator_api.h b/inc/framework/generator/generator_api.h
index 71c6832e..d44edd0c 100644
--- a/inc/framework/generator/generator_api.h
+++ b/inc/framework/generator/generator_api.h
@@ -17,6 +17,20 @@
 #ifndef INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_
 #define INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <stdint.h>
 
 #ifdef __cplusplus
@@ -40,7 +54,7 @@ typedef void *OpTensor_t;
 /// @param [in] om_file: file name for the om to save.
 /// @return 0 for success / others for fail
 ///
-extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num,
+GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num,
                                  const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file);
 
 ///
@@ -52,7 +66,7 @@ extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tenso
 /// @param [in] num: number of shape.
 /// @return OpTensor_t for success / nullptr for failure
 ///
-extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num);
+GE_FUNC_VISIBILITY extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num);
 
 ///
 /// @ingroup ge
@@ -61,7 +75,7 @@ extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape,
 /// @param [out] none
 /// @return 0 for success / others for failure.
 ///
-extern Status_t OpTensorDestroy(OpTensor_t tensor);
+GE_FUNC_VISIBILITY extern Status_t OpTensorDestroy(OpTensor_t tensor);
 
 ///
 /// @ingroup ge
@@ -70,7 +84,7 @@ extern Status_t OpTensorDestroy(OpTensor_t tensor);
 /// @param [out] none
 /// @return OpAttr_t for success / nullptr for failure.
 ///
-extern OpAttr_t OpAttrCreate();
+GE_FUNC_VISIBILITY extern OpAttr_t OpAttrCreate();
 
 ///
 /// @ingroup ge
@@ -79,7 +93,7 @@ extern OpAttr_t OpAttrCreate();
 /// @param [out] none
 /// @return 0 for success / others for failure.
 ///
-extern Status_t OpAttrDestroy(OpAttr_t attr);
+GE_FUNC_VISIBILITY extern Status_t OpAttrDestroy(OpAttr_t attr);
 
 ///
 /// @ingroup ge
@@ -89,7 +103,7 @@ extern Status_t OpAttrDestroy(OpAttr_t attr);
 /// @param [in] value: attributed value.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value);
+GE_FUNC_VISIBILITY extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value);
 
 ///
 /// @ingroup ge
@@ -99,7 +113,7 @@ extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value);
 /// @param [in] value: attribute value.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value);
+GE_FUNC_VISIBILITY extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value);
 
 ///
 /// @ingroup ge
@@ -109,7 +123,7 @@ extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value);
 /// @param [in] value: attribute value.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value);
+GE_FUNC_VISIBILITY extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value);
 
 ///
 /// @ingroup ge
@@ -119,7 +133,7 @@ extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value);
 /// @param [in] value: attribute value (can`t be nullptr, end with '\0').
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value);
+GE_FUNC_VISIBILITY extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value);
 
 ///
 /// @ingroup ge
@@ -130,7 +144,7 @@ extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value
 /// @param [in] num: number of value array.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num);
+GE_FUNC_VISIBILITY extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num);
 
 ///
 /// @ingroup ge
@@ -141,7 +155,7 @@ extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *val
 /// @param [in] num: number of value array.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num);
+GE_FUNC_VISIBILITY extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num);
 
 ///
 /// @ingroup ge
@@ -152,7 +166,7 @@ extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *v
 /// @param [in] num: number of value array.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num);
+GE_FUNC_VISIBILITY extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num);
 
 ///
 /// @ingroup ge
@@ -163,7 +177,7 @@ extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *v
 /// @param [in] num: number of value array.
 /// @return 0 for success / others for failure.
 ///
-extern Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num);
+GE_FUNC_VISIBILITY extern Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num);
 
 #ifdef __cplusplus
 }
diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h
index d8b06125..34e596a2 100644
--- a/inc/framework/memory/memory_api.h
+++ b/inc/framework/memory/memory_api.h
@@ -46,26 +46,26 @@ struct TensorInfo {
 /// \param size [in] rdma pool memory size to be allocated.
 /// \param mem_type [in] memory type for rdma pool.
 /// \return Status result of function
-Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM);
+GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM);
 
 ///
 /// \param var_info [in] host variable addr infos.
 /// \param mem_type [in] memory type for rdma pool.
 /// \return Status result of function
-Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM);
+GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM);
 
 ///
 /// \param tensor_info [in] description for tensor stored shared memory.
 /// \param dev_addr [out] malloced shared memory addr.
 /// \param memory_size [out] malloced shared memory size.
 /// \return Status result of function
-Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size);
+GE_FUNC_VISIBILITY Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size);
 
 ///
 /// \param var_name [in] var_name name of host variable.
 /// \param base_addr [out] base_addr vase addr of host variable.
 /// \param var_size [out] var_size memory_size of host variable.
 /// \return Status result of function
-Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size);
+GE_FUNC_VISIBILITY Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size);
 }  // namespace ge
 #endif  // INC_FRAMEWORK_MEMORY_MEMORY_API_H_
diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h
index 4552fa7c..f5837b3a 100644
--- a/inc/framework/memory/memory_assigner.h
+++ b/inc/framework/memory/memory_assigner.h
@@ -24,7 +24,7 @@
 
 namespace ge {
 const int64_t MEM_ALIGN_SIZE = 512;
-class MemoryAssigner {
+class GE_FUNC_VISIBILITY MemoryAssigner {
  public:
   explicit MemoryAssigner(ge::ComputeGraphPtr compute_graph) : compute_graph_(std::move(compute_graph)) {}
   virtual ~MemoryAssigner() = default;
diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h
index 62332b8d..eb25f919 100644
--- a/inc/framework/omg/omg.h
+++ b/inc/framework/omg/omg.h
@@ -43,7 +43,7 @@ namespace ge {
  * @brief init omg context
  * @return void
  */
-Status InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format,
+GE_FUNC_VISIBILITY Status InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format,
                           bool is_dynamic_input);
 
 /**
@@ -61,7 +61,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format,
  * @param [in] atc_params multiply atc params
  * @return Status result code
  */
-Status ParseGraph(ge::Graph &graph, const std::map<string, string> &atc_params, const char *model_file,
+GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<string, string> &atc_params, const char *model_file,
                   const char *weights_file, domi::FrameworkType type, const char *op_conf = nullptr,
                   const char *target = nullptr, RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false);
 
@@ -73,9 +73,9 @@ Status ParseGraph(ge::Graph &graph, const std::map<string, string> &atc_params,
  * @param [key] encrypted key
  * @return Status result code
  */
-Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json);
+GE_FUNC_VISIBILITY Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json);
 
-Status ConvertPbtxtToJson(const char *model_file, const char *json_file);
+GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char *json_file);
 /**
  * @ingroup domi_omg
  * @brief convert the model file in protobuf format into a JSON file.
@@ -85,26 +85,26 @@ Status ConvertPbtxtToJson(const char *model_file, const char *json_file);
  * @param [key] encrypted key
  * @return Status result code
  */
-Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, const char *json_file);
+GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, const char *json_file);
 
-void GetGroupName(ge::proto::ModelDef &model);
+GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model);
 
-void FindParserSo(const string &path, vector<string> &fileList, string &caffe_parser_path);
+GE_FUNC_VISIBILITY void FindParserSo(const string &path, vector<string> &fileList, string &caffe_parser_path);
 
-Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);
+GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);
 
-Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);
+GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);
 
-Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);
+GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);
 
-void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
+GE_FUNC_VISIBILITY void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
                                 std::vector<std::string> &output_nodes_name);
 
-void UpdateOmgCtxWithParserCtx();
+GE_FUNC_VISIBILITY void UpdateOmgCtxWithParserCtx();
 
-void UpdateParserCtxWithOmgCtx();
+GE_FUNC_VISIBILITY void UpdateParserCtxWithOmgCtx();
 
-void PrintModelInfo(ge::proto::ModelDef *model_def);
+GE_FUNC_VISIBILITY void PrintModelInfo(ge::proto::ModelDef *model_def);
 }  // namespace ge
 
 namespace domi {
@@ -113,7 +113,7 @@ namespace domi {
  * @brief get omg context
  * @return reference of OmgContext
  */
-ge::OmgContext &GetContext();
+GE_FUNC_VISIBILITY ge::OmgContext &GetContext();
 }  // namespace domi
 
 #endif  // INC_FRAMEWORK_OMG_OMG_H_
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index 1049b6b5..54c9ab4a 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -83,7 +83,7 @@ struct OmgContext {
   // user-designate input dims
   std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
   // global input dims
-  std::unordered_map<std::string, std::vector<int64_t>> input_dims;
+  std::map<std::string, std::vector<int64_t>> input_dims;
 
   // resolve the mapping between operators with the same name and corresponding network. format e.g.
   // Detectionoutput:SsdDetectiontOutput
@@ -132,7 +132,7 @@ namespace domi {
  * @brief get OMG context
  * @return OmgContext context
  */
-ge::OmgContext &GetContext();
+GE_FUNC_VISIBILITY ge::OmgContext &GetContext();
 
 struct TEBinInfo {
   // It is obsolete. It will be automatically obtained from the binfilename field of the JSON file later.
diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h
index 9eda685d..8fae5556 100644
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -37,7 +37,7 @@ using Status = domi::Status;
 namespace domi {
 using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
   const google::protobuf::Message *root_proto, const std::string &graph)>;
-class ModelParser {
+class GE_FUNC_VISIBILITY ModelParser {
  public:
   ModelParser() {}
 
diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h
index 087bad32..70bec218 100644
--- a/inc/framework/omg/parser/op_parser.h
+++ b/inc/framework/omg/parser/op_parser.h
@@ -34,7 +34,7 @@ namespace ge {
  * @brief Used to analyze operator information
  *
  */
-class OpParser {
+class GE_FUNC_VISIBILITY OpParser {
  public:
   /**
    * @ingroup domi_omg
diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h
index 382bdfde..6c223665 100644
--- a/inc/framework/omg/parser/parser_api.h
+++ b/inc/framework/omg/parser/parser_api.h
@@ -24,8 +24,8 @@
 
 namespace ge {
 // Initialize parser
-Status ParserInitialize(const std::map<std::string, std::string>& options);
+GE_FUNC_VISIBILITY Status ParserInitialize(const std::map<std::string, std::string>& options);
 // Finalize parser, release all resources
-Status ParserFinalize();
+GE_FUNC_VISIBILITY Status ParserFinalize();
 }  // namespace ge
 #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h
index 4845606f..9d6590c0 100644
--- a/inc/framework/omg/parser/parser_factory.h
+++ b/inc/framework/omg/parser/parser_factory.h
@@ -33,7 +33,7 @@ class ModelParser;
 typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void);
 
 // Create modelparser for different frameworks
-class ModelParserFactory {
+class GE_FUNC_VISIBILITY ModelParserFactory {
  public:
   static ModelParserFactory *Instance();
 
@@ -61,7 +61,7 @@ class ModelParserFactory {
   std::map<domi::FrameworkType, MODEL_PARSER_CREATOR_FUN> creator_map_;
 };  // end class ModelParserFactory
 
-class ModelParserRegisterar {
+class GE_FUNC_VISIBILITY ModelParserRegisterar {
  public:
   ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) {
     ModelParserFactory::Instance()->RegisterCreator(type, fun);
@@ -85,7 +85,7 @@ class ModelParserRegisterar {
 typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void);
 
 // Create weightsparser for different frameworks
-class WeightsParserFactory {
+class GE_FUNC_VISIBILITY WeightsParserFactory {
  public:
   static WeightsParserFactory *Instance();
 
@@ -113,7 +113,7 @@ class WeightsParserFactory {
   std::map<domi::FrameworkType, WEIGHTS_PARSER_CREATOR_FUN> creator_map_;
 };  // end class WeightsParserFactory
 
-class WeightsParserRegisterar {
+class GE_FUNC_VISIBILITY WeightsParserRegisterar {
  public:
   WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) {
     WeightsParserFactory::Instance()->RegisterCreator(type, fun);
diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h
index 5d91bd46..b23da53f 100644
--- a/inc/framework/omg/parser/parser_inner_ctx.h
+++ b/inc/framework/omg/parser/parser_inner_ctx.h
@@ -34,7 +34,7 @@ struct ParserContext {
   std::vector<domiTensorFormat_t> output_formats;
   // user-designate input dims
   std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
-  std::unordered_map<std::string, std::vector<int64_t>> input_dims;
+  std::map<std::string, std::vector<int64_t>> input_dims;
   // resolve the mapping between operators with the same name and corresponding network. format e.g.
   // Detectionoutput:SsdDetectiontOutput
   std::map<std::string, std::string> op_conf_map;
@@ -68,7 +68,7 @@ struct ParserContext {
   std::string enable_scope_fusion_passes;
 };
 
-ParserContext &GetParserContext();
+GE_FUNC_VISIBILITY ParserContext &GetParserContext();
 }  // namespace ge
 
 #endif  // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_
diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h
index 1b5216b3..e4436044 100644
--- a/inc/framework/omg/parser/weights_parser.h
+++ b/inc/framework/omg/parser/weights_parser.h
@@ -34,7 +34,7 @@ namespace domi {
  * @brief Weight information resolver
  *
  */
-class WeightsParser {
+class GE_FUNC_VISIBILITY WeightsParser {
  public:
   /**
    * @ingroup domi_omg
diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h
index ac649d83..4facba0d 100644
--- a/inc/framework/omg/version.h
+++ b/inc/framework/omg/version.h
@@ -27,7 +27,7 @@
 #include "framework/common/debug/ge_log.h"
 
 namespace ge {
-class PlatformVersionManager {
+class GE_FUNC_VISIBILITY PlatformVersionManager {
  public:
   PlatformVersionManager() = delete;
   ~PlatformVersionManager() = delete;
diff --git a/metadef b/metadef
index c86433f1..25967258 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit c86433f19f6df542adaa5d444ea9bc52d96e6371
+Subproject commit 2596725889c19c60a03440ab9e4e313070326ec0
diff --git a/parser b/parser
index 98f17f4a..6516132e 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit 98f17f4a2a37f283797858eabefa9dba1d06a66b
+Subproject commit 6516132e2eaeea2bf51cc790d52c83709588f5d8
diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc
index 13ddf8bb..811db2d2 100644
--- a/tests/depends/omg/src/omg_stub.cc
+++ b/tests/depends/omg/src/omg_stub.cc
@@ -401,7 +401,7 @@ struct OmgContext {
   // user-designate input dims
   std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
   // global input dims
-  std::unordered_map<std::string, std::vector<int64_t>> input_dims;
+  std::map<std::string, std::vector<int64_t>> input_dims;
 
   // solve rename op e.g: Detectionoutput:SsdDetectiontOutput
   std::map<std::string, std::string> op_conf_map;
diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc
index 9b45e7e2..1a170167 100644
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -245,9 +245,35 @@ rtError_t rtProfilerInit(const char *prof_dir, const char *address, const char *
 
 rtError_t rtProfilerStart(void) { return RT_ERROR_NONE; }
 
-rtError_t rtLabelCreate(rtLabel_t *label) { return RT_ERROR_NONE; }
+rtError_t rtLabelCreate(rtLabel_t *label) {
+  *label = new uint64_t;
+  return RT_ERROR_NONE;
+}
+
+rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream) {
+  *label = new uint64_t;
+  return RT_ERROR_NONE;
+}
+
+rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model) {
+  *label = new uint64_t;
+  return RT_ERROR_NONE;
+}
 
-rtError_t rtLabelDestroy(rtLabel_t label) { return RT_ERROR_NONE; }
+rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream) {
+  *label = new uint64_t;
+  return RT_ERROR_NONE;
+}
+
+rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *dst, uint32_t dstMax) {
+  return RT_ERROR_NONE;
+}
+
+rtError_t rtLabelDestroy(rtLabel_t label) {
+  uint64_t *stub = static_cast<uint64_t *>(label);
+  delete stub;
+  return RT_ERROR_NONE;
+}
 
 rtError_t rtLabelSet(rtLabel_t label, rtStream_t stream) { return RT_ERROR_NONE; }
 
@@ -255,8 +281,17 @@ rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t value, rtLa
   return RT_ERROR_NONE;
 }
 
+rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t max, void *labelInfoPtr, rtStream_t stream) {
+  return RT_ERROR_NONE;
+}
+
 rtError_t rtLabelGoto(rtLabel_t label, rtStream_t stream) { return RT_ERROR_NONE; }
 
+rtError_t rtLabelGotoEx(rtLabel_t label, rtStream_t stream) {
+  return RT_ERROR_NONE;
+}
+
+
 rtError_t rtInvalidCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; }
 
 rtError_t rtModelLoadComplete(rtModel_t model) { return RT_ERROR_NONE; }
@@ -364,12 +399,6 @@ rtError_t rtSetCtxINFMode(bool mode)
   return RT_ERROR_NONE;
 }
 
-rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream)
-{
-  *label = new uint32_t;
-  return RT_ERROR_NONE;
-}
-
 rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value)
 {
   return RT_ERROR_NONE;
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index b98c8546..697725c6 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -404,6 +404,8 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc"
     "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc"
     "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc"
+    "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc"
     "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc"
     "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc"
     "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc"
@@ -562,6 +564,46 @@ set(SINGLE_OP_SRC_FILES
     "${GE_CODE_DIR}/ge/single_op/single_op_manager.cc"
     "${GE_CODE_DIR}/ge/single_op/task/aicpu_task_builder.cc"
     "${GE_CODE_DIR}/ge/single_op/task/aicpu_kernel_task_builder.cc"
+    "${GE_CODE_DIR}/ge/hybrid/common/tensor_value.cc"
+    "${GE_CODE_DIR}/ge/hybrid/common/npu_memory_allocator.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/rt_callback_manager.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/node_state.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/node_done_manager.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_profiler.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_model_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_model_async_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_execution_context.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/subgraph_context.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/subgraph_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/worker/task_compile_engine.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/worker/shape_inference_engine.cc"
+    "${GE_CODE_DIR}/ge/hybrid/executor/worker/execution_engine.cc"
+    "${GE_CODE_DIR}/ge/hybrid/model/hybrid_model.cc"
+    "${GE_CODE_DIR}/ge/hybrid/model/hybrid_model_builder.cc"
+    "${GE_CODE_DIR}/ge/hybrid/model/node_item.cc"
+    "${GE_CODE_DIR}/ge/hybrid/model/graph_item.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_op_task.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_builder.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel_factory.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/rts/rts_node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/node_executor.cc"
+    "${GE_CODE_DIR}/ge/hybrid/node_executor/task_context.cc"
+    "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model.cc"
 )
 
 # test files
@@ -587,6 +629,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
     "graph/load/kernel_task_info_unittest.cc"
     "graph/load/memcpy_addr_async_task_info_unittest.cc"
     "graph/load/memcpy_async_task_info_unittest.cc"
+	"graph/load/cpu_queue_schedule_unittest.cc"
     #"graph/graph_load_unittest.cc"
     "graph/ge_executor_unittest.cc"
     "graph/load/model_helper_unittest.cc"
@@ -635,7 +678,7 @@ set(PASS_TEST_FILES
     "graph/passes/net_output_pass_unittest.cc"
     "graph/passes/no_use_reshape_remove_pass_unittest.cc"
     "graph/passes/infershape_pass_unittest.cc"
-	"graph/passes/multi_batch_clone_pass_unittest.cc"
+    "graph/passes/multi_batch_clone_pass_unittest.cc"
 )
 
 set(KERNEL_TEST_FILES
diff --git a/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc b/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc
new file mode 100644
index 00000000..a36754b8
--- /dev/null
+++ b/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc
@@ -0,0 +1,70 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#define private public
+#define protected public
+#include "graph/load/model_manager/cpu_queue_schedule.h"
+#undef private
+#undef protected
+
+using namespace std;
+
+namespace ge {
+class UtestCpuQueueSchedule : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() {}
+};
+
+// test Init_CpuTaskZeroCopy_succ
+TEST_F(UtestCpuQueueSchedule, CpuTaskZeroCopy_Init_Success) {
+  CpuTaskZeroCopy cpu_task_zero_copy(nullptr);
+  std::vector<uintptr_t> mbuf_list;
+  map<uint32_t, ZeroCopyOffset> outside_addrs;
+  ZeroCopyOffset addr_mapping;
+  addr_mapping.addr_count_ = 1;
+  std::vector<void *> addr_offset;
+  addr_offset.push_back((void*) 0x11110000);
+  uintptr_t addr = 0x12340000;
+  std::map<const void *, std::vector<void *>> outside_addr;
+  outside_addr[(void*)addr] = addr_offset;
+  addr_mapping.outside_addrs_.emplace_back(outside_addr);
+  mbuf_list.emplace_back(addr);
+  uint32_t index = 0;
+  outside_addrs[index] = addr_mapping;
+  EXPECT_EQ(cpu_task_zero_copy.Init(mbuf_list, outside_addrs), SUCCESS);
+}
+
+TEST_F(UtestCpuQueueSchedule, CpuTaskInfo_Init_args_valid) {
+  CpuTaskZeroCopy cpu_task_zero_copy(nullptr);
+  CpuTaskActiveEntry cpu_task_active_entry(nullptr);
+  CpuTaskModelDequeue cpu_task_model_dequeue(nullptr);
+  CpuTaskModelRepeat cpu_task_model_repeat(nullptr);
+  CpuTaskWaitEndGraph cpu_task_wait_end_graph(nullptr);
+  CpuTaskModelEnqueue cpu_task_model_enqueue(nullptr);
+  CpuTaskPrepareOutput cpu_task_prepare_output(nullptr);
+  EXPECT_EQ(cpu_task_zero_copy.Distribute(), FAILED);
+  EXPECT_EQ(cpu_task_active_entry.Distribute(), FAILED);
+  EXPECT_EQ(cpu_task_model_dequeue.Distribute(), FAILED);
+  EXPECT_EQ(cpu_task_model_repeat.Distribute(), FAILED);
+  EXPECT_EQ(cpu_task_wait_end_graph.Distribute(), FAILED);
+  EXPECT_EQ(cpu_task_model_enqueue.Distribute(), FAILED);
+  EXPECT_EQ(cpu_task_prepare_output.Distribute(), FAILED);
+}
+}  // namespace ge
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 47968345..2b9bb4ed 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -32,21 +32,10 @@ class UtestDavinciModel : public testing::Test {
   void SetUp() {}
 
   void TearDown() {}
-  public:
-    NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
-      GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
-      auto op_desc = std::make_shared<OpDesc>(name, type);
-      for (auto i = 0; i < in_num; ++i) {
-        op_desc->AddInputDesc(test_desc);
-      }
-      for (auto i = 0; i < out_num; ++i) {
-        op_desc->AddOutputDesc(test_desc);
-      }
-      return graph->AddNode(op_desc);
-    }
 };
 
-/*TEST_F(UtestDavinciModel, init_success) {
+/*
+TEST_F(UtestDavinciModel, init_success) {
   DavinciModel model(0, nullptr);
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
   ProfilingManager::Instance().is_load_profiling_ = true;
@@ -130,7 +119,8 @@ class UtestDavinciModel : public testing::Test {
   EXPECT_EQ(outputs.size(), 1);
 
   ProfilingManager::Instance().is_load_profiling_ = false;
-}*/
+}
+*/
 
 TEST_F(UtestDavinciModel, init_data_op) {
   DavinciModel model(0, nullptr);
@@ -181,7 +171,8 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) {
 
   uint32_t data_op_index = 0;
   map<uint32_t, OpDescPtr> data_by_index;
-  EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS);
+  set<const void *> input_outside_addrs;
+  EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index, input_outside_addrs), SUCCESS);
 
   EXPECT_EQ(model.input_addrs_list_.size(), 0);
   EXPECT_EQ(model.output_addrs_list_.size(), 0);
@@ -206,7 +197,8 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
   NodePtr node = graph->AddNode(op_output);
 
   std::vector<OpDescPtr> output_op_list;
-  EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS);
+  set<const void *> output_outside_addrs;
+  EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list, output_outside_addrs), SUCCESS);
 
   EXPECT_EQ(model.input_addrs_list_.size(), 0);
   EXPECT_EQ(model.output_addrs_list_.size(), 0);
@@ -755,4 +747,110 @@ TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) {
   EXPECT_EQ(model.output_addrs_list_.size(), 0);
   EXPECT_EQ(model.op_list_.size(), 1);
 }
+
+/*
+// test label_set_task Init
+TEST_F(UtestDavinciModel, label_task_success) {
+  DavinciModel model(0, nullptr);
+  ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
+
+  GeModelPtr ge_model = make_shared<GeModel>();
+  ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
+  AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000);
+  AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);
+
+  shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
+  ge_model->SetModelTaskDef(model_task_def);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT32);
+  TensorUtils::SetSize(tensor, 64);
+
+  uint32_t op_index = 0;
+  {
+    OpDescPtr op_desc = CreateOpDesc("label_switch", LABELSWITCHBYINDEX);
+    op_desc->AddInputDesc(tensor);
+    op_desc->SetInputOffset({1024});
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 0
+    EXPECT_TRUE(AttrUtils::SetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, {0, 1}));
+
+    domi::TaskDef *task_def1 = model_task_def->add_task();
+    task_def1->set_stream_id(0);
+    task_def1->set_type(RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX);
+    domi::LabelSwitchByIndexDef *label_task_def = task_def1->mutable_label_switch_by_index();
+    label_task_def->set_op_index(op_index++);
+    label_task_def->set_label_max(2);
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("label_then", LABELSET);
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 1
+    EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 1));
+
+    domi::TaskDef *task_def1 = model_task_def->add_task();
+    task_def1->set_stream_id(0);
+    task_def1->set_type(RT_MODEL_TASK_LABEL_SET);
+    domi::LabelSetDef *label_task_def = task_def1->mutable_label_set();
+    label_task_def->set_op_index(op_index++);
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("label_goto", LABELGOTOEX);
+    NodePtr node = graph->AddNode(op_desc);      // op_index = 2
+    EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 2));
+
+    domi::TaskDef *task_def2 = model_task_def->add_task();
+    task_def2->set_stream_id(0);
+    task_def2->set_type(RT_MODEL_TASK_STREAM_LABEL_GOTO);
+    domi::LabelGotoExDef *label_task_def = task_def2->mutable_label_goto_ex();
+    label_task_def->set_op_index(op_index++);
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("label_else", LABELSET);
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 3
+    EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 0));
+
+    domi::TaskDef *task_def1 = model_task_def->add_task();
+    task_def1->set_stream_id(0);
+    task_def1->set_type(RT_MODEL_TASK_LABEL_SET);
+    domi::LabelSetDef *label_task_def = task_def1->mutable_label_set();
+    label_task_def->set_op_index(op_index++);
+  }
+
+  {
+    OpDescPtr op_desc = CreateOpDesc("label_leave", LABELSET);
+    NodePtr node = graph->AddNode(op_desc);  // op_index = 4
+    EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 2));
+
+    domi::TaskDef *task_def1 = model_task_def->add_task();
+    task_def1->set_stream_id(0);
+    task_def1->set_type(RT_MODEL_TASK_LABEL_SET);
+    domi::LabelSetDef *label_task_def = task_def1->mutable_label_set();
+    label_task_def->set_op_index(op_index++);
+  }
+
+  EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_LABEL_NUM, 3));
+  EXPECT_EQ(model.Assign(ge_model), SUCCESS);
+  EXPECT_EQ(model.Init(), SUCCESS);
+  EXPECT_EQ(model.input_addrs_list_.size(), 0);
+  EXPECT_EQ(model.output_addrs_list_.size(), 0);
+  EXPECT_EQ(model.task_list_.size(), 5);
+}
+*/ 
+
+TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) {
+  DavinciModel model(0, nullptr);
+  model.ge_model_ = make_shared<GeModel>();
+  model.input_queue_ids_.emplace_back(0);
+  EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID);
+  EXPECT_EQ(model.input_data_info_.size(), 0);
+  ZeroCopyOffset zero_copy_offset;
+  model.input_data_info_[0] = zero_copy_offset;
+  model.output_queue_ids_.emplace_back(0);
+  EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID);
+  EXPECT_EQ(model.output_data_info_.size(), 0);
+  model.output_data_info_[0] = zero_copy_offset;
+  EXPECT_EQ(model.LoadWithQueue(), INTERNAL_ERROR);
+  EXPECT_EQ(model.active_stream_list_.size(), 0);
+}
 }  // namespace ge
diff --git a/tests/ut/ge/single_op/single_op_manager_unittest.cc b/tests/ut/ge/single_op/single_op_manager_unittest.cc
index a70d2984..05da8683 100644
--- a/tests/ut/ge/single_op/single_op_manager_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_manager_unittest.cc
@@ -17,7 +17,6 @@
 #include <gtest/gtest.h>
 #include <vector>
 
-#include "cce/taskdown_common.hpp"
 #include "runtime/rt.h"
 
 #define protected public
diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h
index 5fed8960..35fcc857 100644
--- a/third_party/fwkacllib/inc/register/op_kernel_registry.h
+++ b/third_party/fwkacllib/inc/register/op_kernel_registry.h
@@ -27,10 +27,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry {
   using CreateFn = HostCpuOp* (*)();
   ~OpKernelRegistry();
 
-  static OpKernelRegistry& GetInstance() {
-    static OpKernelRegistry instance;
-    return instance;
-  }
+  static OpKernelRegistry& GetInstance();
 
   bool IsRegistered(const std::string &op_type);
 
diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h
index 318eb3ba..f7e37390 100644
--- a/third_party/fwkacllib/inc/register/op_registry.h
+++ b/third_party/fwkacllib/inc/register/op_registry.h
@@ -21,6 +21,7 @@
 #include <set>
 #include <string>
 #include <unordered_map>
+#include <map>
 #include <vector>
 
 #include "register/register.h"
@@ -88,7 +89,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
   std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_;
   std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
   std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
-  std::unordered_map<std::string, std::string> origin_type_to_om_type_;
+  std::map<std::string, std::string> origin_type_to_om_type_;
   std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_;
   std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_;
 };