!963 code sync 0116

From: @changzherui Reviewed-by: @liujunzhu,@guoqi1024 Signed-off-by: @guoqi1024
3 years ago · 50f17e37d9
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,8 +16,11 @@ endif()

 if(DEFINED ENV{D_PKG_SERVER})
    set(GE_PB_PKG $ENV{D_PKG_SERVER})
    message("Download packages from PKG server")
 endif()
    message("Download packages from DPKG server")
 elseif(DEFINED ENV{MSLIBS_SERVER})
    set(GE_PB_PKG "http://$ENV{MSLIBS_SERVER}:8081")
    message("Download packages from MSPKG server")
 endif ()

 set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
 set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
@@ -37,7 +40,7 @@ set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} ${ATLAS_ATC_DIR}
 option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE)

 if (ENABLE_OPEN_SRC)
    set(HI_PYTHON python3.7)
    set(HI_PYTHON python3)

    include(cmake/external_libs/protobuf_shared.cmake)
    include(cmake/external_libs/protobuf_static.cmake)
@@ -49,10 +52,6 @@ if (ENABLE_OPEN_SRC)
    include(cmake/FindModule.cmake)
    include(cmake/intf_pub_linux.cmake)

    # for CPU/GPU mode, find c_sec and slog from local prebuild
    #if(NOT ENABLE_D AND NOT GE_ONLY)
    #    set(GE_PREBUILD_PATH ${GE_CODE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
    #    find_module(slog libslog.so ${GE_PREBUILD_PATH})
    # if D_LINK_PATH is set in environment variables, search libraries in given path
    if(DEFINED ENV{D_LINK_PATH})
        # D_LINK_PATH is set
@@ -69,9 +68,9 @@ if (ENABLE_OPEN_SRC)
        endif()
        set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
        set(STATIC_ACL_LIB ${GE_LIB_PATH})
        find_module(slog libslog.so ${GE_LIB_PATH})
        find_module(slog libalog.so ${GE_LIB_PATH})
        find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
        find_module(msprof libmsprof.so ${GE_LIB_PATH})
        find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH})
        find_module(hccl libhccl.so ${GE_LIB_PATH})
        find_module(adump_server libadump_server.a ${GE_LIB_PATH})
        find_module(runtime libruntime.so ${GE_LIB_PATH})
@@ -80,20 +79,21 @@ if (ENABLE_OPEN_SRC)
        find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
        find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
        find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
        find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
        find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH})
        #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
    elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
    else()
        find_module(slog libslog.so ${ASCEND_ATC_DIR} ${ASCEND_DRIVER_COMMON_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
        find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
        if(PLATFORM STREQUAL "train")
            find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
            find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
            find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
            find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler libmsprofiler.a ${ASCEND_RUNTIME_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            if(PRODUCT STREQUAL "flr3")
                message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
@@ -105,21 +105,18 @@ if (ENABLE_OPEN_SRC)
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
 	        #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
            if(PRODUCT STREQUAL "flr3")
                find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR})
            elseif(PRODUCT STREQUAL "flr1")
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
                find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
            elseif(PRODUCT STREQUAL "flr2")
                # flr2 ascend_hal_stub limsprof ?
            else()
                find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
                find_module(msprof libmsprof.so ${ASCEND_DRIVER_DIR})
            endif()
        elseif(PLATFORM STREQUAL "all")
            find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
            find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
            find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
            find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -127,17 +124,12 @@ if (ENABLE_OPEN_SRC)
            find_module(resource libresource.so ${ASCEND_ATC_DIR})
            find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
            find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
            find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
            find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
            #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
        else()
 	    message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
            message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
        endif()

 	if (ENABLE_GE_COV OR ENABLE_GE_UT)
            add_subdirectory(tests)
        endif()

    endif()

    set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
@@ -158,7 +150,7 @@ elseif (ENABLE_D OR ENABLE_ACL)
    include(cmake/intf_pub_linux.cmake)

    # common libraries
    find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
    find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

@@ -178,7 +170,7 @@ elseif(ENABLE_MS_TESTCASES)
    include(cmake/intf_pub_linux.cmake)

    # common libraries
    find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
    find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
    find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

--- a/+ 73
+++ b/+ 73
@@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation.

 License: MIT License
 Please see above.



 Software: caffe 1.0

 License: BSD 2-Clause License

 Open Source Software Licensed Under the BSD 2-Clause License

 GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model.
 Please see below for the full list of source code files from caffe that are used by GraphEngine.
 The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 ----------------------------------------------------------------------------------------
 1. caffe.proto  master
 All contributions by the University of California:
 Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 All rights reserved.


 Terms of the BSD 2-Clause License:
 --------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

 Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



 Software: tensorflow 1.15.0

 License: Apache-2.0 License

 Open Source Software Licensed Under the Apache-2.0 License


 GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model.
 Please see below for the full list of source code files from tensorflow that are used by GraphEngine.
 The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 ----------------------------------------------------------------------------------------
 1. attr_value.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 2. function.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 3. graph.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 4. node_def.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 5. op_def.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 6. resource_handle.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 7. tensor.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 8. tensor_shape.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 9. types.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 10. versions.proto  master
 Copyright 2015 The TensorFlow Authors. All Rights Reserved.

 Terms of the Apache-2.0 License:
 Please see above.
--- a/build.sh
+++ b/build.sh
@@ -23,7 +23,7 @@ export BUILD_PATH="${BASEPATH}/build/"
 usage()
 {
  echo "Usage:"
  echo "sh build.sh [-j[n]] [-h] [-v] [-s] [-t] [-u] [-c] [-S on|off]"
  echo "sh build.sh [-j[n]] [-h] [-v] [-s] [-t] [-u] [-c] [-S on|off] [-M]"
  echo ""
  echo "Options:"
  echo "    -h Print usage"
@@ -35,6 +35,7 @@ usage()
  echo "    -p Build inference or train"
  echo "    -v Display build command"
  echo "    -S Enable enable download cmake compile dependency from gitee , default off"
  echo "    -M build MindSpore mode"
  echo "to be continued ..."
 }

@@ -58,30 +59,27 @@ checkopts()
  ENABLE_GE_UT="off"
  ENABLE_GE_ST="off"
  ENABLE_GE_COV="off"
  GE_ONLY="on"
  PLATFORM=""
  PRODUCT="normal"
  ENABLE_GITEE="off"
  MINDSPORE_MODE="off"
  # Process the options
  while getopts 'ustchj:p:g:vS:' opt
  while getopts 'ustchj:p:g:vS:M' opt
  do
    OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
    case "${opt}" in
      u)
        # ENABLE_GE_UT_ONLY_COMPILE="on"
        ENABLE_GE_UT="on"
        GE_ONLY="off"
        ;;
      s)
        ENABLE_GE_ST="on"
        ;;
      t)
 	      ENABLE_GE_UT="on"
 	      GE_ONLY="off"
 	      ;;
      c)
        ENABLE_GE_COV="on"
        GE_ONLY="off"
        ;;
      h)
        usage
@@ -104,6 +102,9 @@ checkopts()
        ENABLE_GITEE="$OPTARG"
        echo "enable download from gitee"
        ;;
      M)
        MINDSPORE_MODE="on"
        ;;
      *)
        echo "Undefined option: ${opt}"
        usage
@@ -132,7 +133,8 @@ build_graphengine()
  echo "create build directory and build GraphEngine";
  mk_dir "${BUILD_PATH}"
  cd "${BUILD_PATH}"
  CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH -DGE_ONLY=$GE_ONLY"

  CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH"

  if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_COV=ON"
@@ -150,7 +152,13 @@ build_graphengine()
  if [[ "X$ENABLE_GITEE" = "Xon" ]]; then
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON"
  fi
  CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}"

  if [[ "X$MINDSPORE_MODE" = "Xoff" ]]; then
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}"
  else
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_D=ON -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH}"
  fi

  echo "${CMAKE_ARGS}"
  cmake ${CMAKE_ARGS} ..
  if [ $? -ne 0 ]
@@ -162,13 +170,16 @@ build_graphengine()
  TARGET=${COMMON_TARGET}
  if [ "x${PLATFORM}" = "xtrain" ]
  then
    TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ${TARGET}"
    TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}"
  elif [ "x${PLATFORM}" = "xinference" ]
  then
    TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc opensrc_ascendcl ${TARGET}"
    TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}"
  elif [ "X$ENABLE_GE_UT" = "Xon" ]
  then
    TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest"
  elif [ "X$MINDSPORE_MODE" = "Xon" ]
  then
    TARGET="ge_common graph"
  elif [ "x${PLATFORM}" = "xall" ]
  then
    # build all the target
@@ -224,12 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
 #     fi

 #     if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
 #         echo "Generating coverage statistics, please wait..."
 #         cd ${BASEPATH}
 #         rm -rf ${BASEPATH}/cov
 #         mkdir ${BASEPATH}/cov
 #         gcovr -r ./ --exclude 'third_party' --exclude 'build' --exclude 'tests' --exclude 'prebuild' --exclude 'inc' --print-summary --html --html-details -d -o cov/index.html
 #     fi
         echo "Generating coverage statistics, please wait..."
         cd ${BASEPATH}
         rm -rf ${BASEPATH}/cov
         mkdir ${BASEPATH}/cov
         lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
 	 lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
 	 cd ${BASEPATH}/cov
 	 genhtml coverage.info
 fi

 # generate output package in tar form, including ut/st libraries/executables
@@ -242,6 +255,7 @@ generate_package()
  FWK_PATH="fwkacllib/lib64"
  ATC_PATH="atc/lib64"
  ATC_BIN_PATH="atc/bin"
  FWK_BIN_PATH="fwkacllib/bin"
  NNENGINE_PATH="plugin/nnengine/ge_config"
  OPSKERNEL_PATH="plugin/opskernel"

@@ -254,6 +268,7 @@ generate_package()
  rm -rf ${OUTPUT_PATH:?}/${ACL_PATH}/
  rm -rf ${OUTPUT_PATH:?}/${ATC_PATH}/
  rm -rf ${OUTPUT_PATH:?}/${ATC_BIN_PATH}/
  rm -rf ${OUTPUT_PATH:?}/${FWK_BIN_PATH}/

  mk_dir "${OUTPUT_PATH}/${FWK_PATH}/${NNENGINE_PATH}"
  mk_dir "${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH}"
@@ -261,6 +276,7 @@ generate_package()
  mk_dir "${OUTPUT_PATH}/${ATC_PATH}/${OPSKERNEL_PATH}"
  mk_dir "${OUTPUT_PATH}/${ACL_PATH}"
  mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}"
  mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}"
 
  cd "${OUTPUT_PATH}"

@@ -299,7 +315,8 @@ generate_package()
    find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH} \;
  done

  find ./bin -name atc -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \;
  find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \;
  find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \;
  find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \;
  
  if [ "x${PLATFORM}" = "xtrain" ]
@@ -314,7 +331,12 @@ generate_package()
  fi
 }

 if [[ "X$ENABLE_GE_UT" = "Xoff" ]]; then
 if [[ "X$ENABLE_GE_UT" = "Xoff" && "X$MINDSPORE_MODE" = "Xoff" ]]; then
  generate_package
 elif [ "X$MINDSPORE_MODE" = "Xon" ]
 then
  cd "${OUTPUT_PATH}"
  find ./ -name graphengine_lib.tar -exec rm {} \;
  tar -cf graphengine_lib.tar lib
 fi
 echo "---------------- GraphEngine package archive generated ----------------"
 echo "---------------- GraphEngine package archive generated ----------------"
--- a/classify_rule.txt
+++ b/classify_rule.txt
@@ -0,0 +1,5 @@
 [graphengine]
 ge
 inc
 metadef
 parser
--- a/cmake/FindModule.cmake
+++ b/cmake/FindModule.cmake
@@ -21,7 +21,7 @@ function(find_module module name)
    if ("${${module}_LIBRARY_DIR}" STREQUAL "${module}_LIBRARY_DIR-NOTFOUND")
      message(FATAL_ERROR "${name} not found in ${path}")
    endif()
    

    add_library(${module} SHARED IMPORTED)
    set_target_properties(${module} PROPERTIES
      IMPORTED_LOCATION ${${module}_LIBRARY_DIR}
--- a/cmake/external_libs/gflags.cmake
+++ b/cmake/external_libs/gflags.cmake
@@ -23,6 +23,7 @@ ExternalProject_Add(gflags_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
--- a/cmake/external_libs/gtest.cmake
+++ b/cmake/external_libs/gtest.cmake
@@ -10,7 +10,10 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if (ENABLE_GITEE)
 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/ge_gtest/release-1.8.0.tar.gz")
    set(MD5 "")
 elseif (ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
    set(MD5 "")
 else()
@@ -22,8 +25,9 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-
 set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(gtest_build
                    URL ${REQ_URL}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR>
 		    -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
                -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE 
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -18,6 +18,7 @@ ExternalProject_Add(json_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/cloud_code/pkg/include.zip
                    SOURCE_DIR  ${JSON_SRC_DIR}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ""
                    BUILD_COMMAND ""
                    INSTALL_COMMAND ""
--- a/cmake/external_libs/onnx.cmake
+++ b/cmake/external_libs/onnx.cmake
@@ -6,7 +6,10 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
 set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
 file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})

 if (ENABLE_GITEE)
 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz")
    set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
 elseif (ENABLE_GITEE)
    set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
    set(MD5 "1bdbcecdd68ea8392630467646776e02")
 else()
@@ -19,6 +22,7 @@ ExternalProject_Add(onnx
                    #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
                    #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
                    #SOURCE_DIR ${ONNX_SRC_DIR}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ""
                    BUILD_COMMAND ""
                    #INSTALL_COMMAND "" 
--- a/cmake/external_libs/protobuf_shared.cmake
+++ b/cmake/external_libs/protobuf_shared.cmake
@@ -26,6 +26,7 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protobuf_build
                    URL ${REQ_URL}
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -Dprotobuf_WITH_ZLIB=OFF
                    -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
--- a/cmake/external_libs/protobuf_static.cmake
+++ b/cmake/external_libs/protobuf_static.cmake
@@ -27,6 +27,7 @@ ExternalProject_Add(protobuf_static_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
--- a/cmake/external_libs/protoc.cmake
+++ b/cmake/external_libs/protoc.cmake
@@ -1,115 +1,116 @@
 if (HAVE_PROTOC)
    return()
 endif()

 include(ExternalProject)
 include(GNUInstallDirs)
 #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

 if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
    set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if(GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
 else()
    if (ENABLE_GITEE)
        set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
        set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
    else()
        set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
        set(MD5 "3d9e32700639618a4d2d342c99d4507a")
    endif ()
 endif()

 set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protoc_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE
 )

 set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)

 set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)

 function(protobuf_generate comp c_var h_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
        return()
    endif()
    set(${c_var})
    set(${h_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
    set(${c_var} ${${c_var}} PARENT_SCOPE)
    set(${h_var} ${${h_var}} PARENT_SCOPE)

 endfunction()

 function(protobuf_generate_py comp py_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
        return()
    endif()
    set(${py_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}_pb2.py"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
    set(${py_var} ${${py_var}} PARENT_SCOPE)

 endfunction()

 #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
 set(HAVE_PROTOC TRUE)
 if (HAVE_PROTOC)
    return()
 endif()

 include(ExternalProject)
 include(GNUInstallDirs)
 #set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

 if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
    set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if(GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz")
 else()
    if (ENABLE_GITEE)
        set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
        set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
    else()
        set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
        set(MD5 "3d9e32700639618a4d2d342c99d4507a")
    endif ()
 endif()

 set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
 set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
 ExternalProject_Add(protoc_build
                    URL ${REQ_URL}
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
                    BUILD_COMMAND $(MAKE)
                    INSTALL_COMMAND $(MAKE) install
                    EXCLUDE_FROM_ALL TRUE
 )

 set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)

 set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)

 function(protobuf_generate comp c_var h_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
        return()
    endif()
    set(${c_var})
    set(${h_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
        list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
    set(${c_var} ${${c_var}} PARENT_SCOPE)
    set(${h_var} ${${h_var}} PARENT_SCOPE)

 endfunction()

 function(protobuf_generate_py comp py_var)
    if(NOT ARGN)
        message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
        return()
    endif()
    set(${py_var})

    foreach(file ${ARGN})
        get_filename_component(abs_file ${file} ABSOLUTE)
        get_filename_component(file_name ${file} NAME_WE)
        get_filename_component(file_dir ${abs_file} PATH)
        get_filename_component(parent_subdir ${file_dir} NAME)

        if("${parent_subdir}" STREQUAL "proto")
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
        else()
            set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
        endif()
        list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")

        add_custom_command(
                OUTPUT "${proto_output_path}/${file_name}_pb2.py"
                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
                COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
                COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
                DEPENDS protoc_build ${abs_file}
                COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
    endforeach()

    set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
    set(${py_var} ${${py_var}} PARENT_SCOPE)

 endfunction()

 #set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
 set(HAVE_PROTOC TRUE)
--- a/cmake/external_libs/securec.cmake
+++ b/cmake/external_libs/securec.cmake
@@ -10,11 +10,20 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
    message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
 endif()

 if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz")
    set(MD5 "")
 else()
    set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz")
    set(MD5 "")
 endif ()

 ExternalProject_Add(c_sec_build
                    URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                    #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
                    URL ${REQ_URL}
                    #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
                    #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
                    PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
                    TLS_VERIFY OFF
                    CONFIGURE_COMMAND ${CMAKE_COMMAND}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
--- a/cmake/intf_pub_linux.cmake
+++ b/cmake/intf_pub_linux.cmake
@@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE
    $<$<CONFIG:Debug>:CFG_BUILD_DEBUG>   
    WIN64=1
    LINUX=0
    LOG_CPP
 )
 target_link_options(intf_pub INTERFACE
    -Wl,-z,relro
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -1,7 +1,6 @@
 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
    add_subdirectory(common)
    add_subdirectory(plugin/engine)
    add_subdirectory(graph/build/memory)
    add_subdirectory(ge_local_engine)
    add_subdirectory(host_cpu_engine)
    add_subdirectory(executor)
@@ -33,6 +32,51 @@ protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST})
 protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST})

 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 ############ libge_proto_common.a ############
 add_library(ge_proto_common STATIC
    ${PROTO_HEADER_HDRS}
    ${PROTO_SRCS}
 )

 target_compile_definitions(ge_proto_common PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    google=ascend_private
 )

 target_compile_options(ge_proto_common PRIVATE
    -O2
    -fno-common
 )

 target_link_libraries(ge_proto_common PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
 )

 ############ libge_proto_client.a ############
 add_library(ge_proto_client STATIC
    ${PROTO_HEADER_HDRS}
    ${PROTO_CLIENT_SRCS}
 )

 target_compile_definitions(ge_proto_client PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    google=ascend_private
 )

 target_compile_options(ge_proto_client PRIVATE
    -O2
    -fno-common
 )

 target_link_libraries(ge_proto_client PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
 )
 endif ()

 ##################################################################
 set(TRAIN_SRC_LIST
    "common/formats/format_transfers/datatype_transfer.cc"
    "common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc"
@@ -60,6 +104,8 @@ set(TRAIN_SRC_LIST
    "common/dump/dump_manager.cc"
    "common/dump/dump_properties.cc"
    "common/dump/dump_op.cc"
    "common/profiling/ge_profiling.cc"
    "common/profiling/ge_runner_profiling.cc"
    "engine_manager/dnnengine_manager.cc"
    "ge_local_engine/engine/host_cpu_engine.cc"
    "generator/ge_generator.cc"
@@ -123,6 +169,7 @@ set(TRAIN_SRC_LIST
    "graph/manager/graph_var_manager.cc"
    "graph/manager/host_mem_manager.cc"
    "graph/manager/rdma_pool_allocator.cc"
    "graph/manager/host_mem_allocator.cc"
    "graph/manager/memory_api.cc"
    "graph/manager/model_manager/event_manager.cc"
    "graph/manager/trans_var_data_utils.cc"
@@ -142,6 +189,7 @@ set(TRAIN_SRC_LIST
    "graph/passes/atomic_addr_clean_pass.cc"
    "graph/passes/mark_same_addr_pass.cc"
    "graph/passes/mark_graph_unknown_status_pass.cc"
    "graph/passes/dynamic_single_op_reset_shape_pass.cc"
    "graph/passes/mark_agnostic_pass.cc"
    "graph/partition/dynamic_shape_partition.cc"
    "graph/partition/stage_partition.cc"
@@ -154,13 +202,17 @@ set(TRAIN_SRC_LIST
    "graph/passes/compile_nodes_pass.cc"
    "graph/passes/constant_folding_pass.cc"
    "graph/passes/constant_fuse_same_pass.cc"
    "graph/passes/fuse_data_nodes_with_common_input_pass.cc"
    "graph/passes/remove_same_const_pass.cc"
    "graph/passes/useless_control_out_remove_pass.cc"
    "graph/passes/control_trigger_pass.cc"
    "graph/passes/dimension_adjust_pass.cc"
    "graph/passes/dimension_compute_pass.cc"
    "graph/passes/dropout_pass.cc"
    "graph/passes/hccl_group_pass.cc"
    "graph/passes/enter_pass.cc"
    "graph/passes/assign_pass.cc"
    "graph/passes/assign_remove_pass.cc"
    "graph/passes/inplace_support_check_pass.cc"
    "graph/passes/flow_ctrl_pass.cc"
    "graph/passes/global_step_insert_pass.cc"
    "host_kernels/transpose_kernel.cc"
@@ -201,6 +253,7 @@ set(TRAIN_SRC_LIST
    "host_kernels/sub_kernel.cc"
    "host_kernels/transdata_kernel.cc"
    "host_kernels/unpack_kernel.cc"
    "host_kernels/reformat_kernel.cc"
    "graph/passes/folding_pass.cc"
    "graph/passes/get_original_format_pass.cc"
    "graph/passes/guarantee_const_pass.cc"
@@ -331,10 +384,16 @@ set(TRAIN_SRC_LIST
    "hybrid/hybrid_davinci_model.cc"
    "executor/ge_executor.cc"
    "client/ge_api.cc"
    "client/ge_prof.cc"
    "analyzer/analyzer.cc"
    "ir_build/ge_ir_build.cc"
    "ir_build/atc_ir_common.cc"
    "graph/build/memory/memory_assigner.cc"
    "graph/build/memory/graph_mem_assigner.cc"
    "graph/build/memory/binary_block_mem_assigner.cc"
    "graph/build/memory/block_mem_assigner.cc"
    "graph/build/memory/hybrid_mem_assigner.cc"
    "graph/build/memory/max_block_mem_assigner.cc"
    "graph/build/memory/var_mem_assign_util.cc"
 )

 set(INFER_SRC_LIST
@@ -396,6 +455,7 @@ set(INFER_SRC_LIST
    "graph/manager/graph_var_manager.cc"
    "graph/manager/host_mem_manager.cc"
    "graph/manager/rdma_pool_allocator.cc"
    "graph/manager/host_mem_allocator.cc"
    "graph/manager/graph_mem_allocator.cc"
    "graph/manager/graph_caching_allocator.cc"
    "model/ge_model.cc"
@@ -425,6 +485,7 @@ set(INFER_SRC_LIST
    "graph/passes/net_output_pass.cc"
    "graph/passes/replace_transshape_pass.cc"
    "graph/passes/constant_fuse_same_pass.cc"
    "graph/passes/fuse_data_nodes_with_common_input_pass.cc"
    "graph/passes/print_op_pass.cc"
    "graph/passes/no_use_reshape_remove_pass.cc"
    "graph/passes/iterator_op_pass.cc"
@@ -432,6 +493,7 @@ set(INFER_SRC_LIST
    "graph/passes/atomic_addr_clean_pass.cc"
    "graph/passes/mark_same_addr_pass.cc"
    "graph/passes/mark_graph_unknown_status_pass.cc"
    "graph/passes/dynamic_single_op_reset_shape_pass.cc"
    "graph/passes/mark_agnostic_pass.cc"
    "graph/common/omg_util.cc"
    "graph/common/bcast.cc"
@@ -487,6 +549,7 @@ set(INFER_SRC_LIST
    "host_kernels/slice_d_kernel.cc"
    "host_kernels/dynamic_stitch_kernel.cc"
    "host_kernels/identity_kernel.cc"
    "host_kernels/reformat_kernel.cc"
    "graph/passes/stop_gradient_pass.cc"
    "graph/passes/prevent_gradient_pass.cc"
    "graph/passes/identity_pass.cc"
@@ -514,9 +577,12 @@ set(INFER_SRC_LIST
    "graph/passes/cond_remove_pass.cc"
    "graph/passes/for_pass.cc"
    "graph/passes/enter_pass.cc"
    "graph/passes/assign_pass.cc"
    "graph/passes/assign_remove_pass.cc"
    "graph/passes/inplace_support_check_pass.cc"
    "graph/passes/addn_pass.cc"
    "graph/passes/common_subexpression_elimination_pass.cc"
    "graph/passes/remove_same_const_pass.cc"
    "graph/passes/useless_control_out_remove_pass.cc"
    "graph/passes/transop_symmetry_elimination_pass.cc"
    "graph/passes/save_pass.cc"
    "graph/passes/switch_dead_branch_elimination.cc"
@@ -598,11 +664,34 @@ set(INFER_SRC_LIST
    "graph/label/while_label_maker.cc"
    "graph/label/partitioned_call_label_maker.cc"
    "analyzer/analyzer.cc"
    "graph/build/memory/memory_assigner.cc"
    "graph/build/memory/graph_mem_assigner.cc"
    "graph/build/memory/binary_block_mem_assigner.cc"
    "graph/build/memory/block_mem_assigner.cc"
    "graph/build/memory/hybrid_mem_assigner.cc"
    "graph/build/memory/max_block_mem_assigner.cc"
    "graph/build/memory/var_mem_assign_util.cc"
 )

 if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
 message("CMAKE_CXX_COMPILER_VERSION = ${CMAKE_CXX_COMPILER_VERSION}")
 ############ libge_runner.so ############
 add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})
 add_library(ge_runner SHARED
    ${TRAIN_SRC_LIST}
    $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler_fwk>,msprofiler_fwk,msprofiler_fwk_object>>
 )

 add_library(msprofiler_fwk_object OBJECT IMPORTED GLOBAL)

 if (msprofiler_fwk_ext_LIBRARY_DIR)
    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object)
    execute_process(
        COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR}
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object
    )
    file(GLOB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o)
    set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST})
 endif()

 target_compile_definitions(ge_runner PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -615,9 +704,12 @@ target_compile_definitions(ge_runner PRIVATE

 target_compile_options(ge_runner PRIVATE
    -O2
    -fno-common
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable>
 )

 target_include_directories(ge_runner PRIVATE
 target_include_directories(ge_runner SYSTEM PRIVATE
    ${GE_CODE_DIR}/ge
    ${GE_CODE_DIR}/ge/analyzer
    ${GE_CODE_DIR}/inc
@@ -643,12 +735,12 @@ target_include_directories(ge_runner PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_libraries(ge_runner
 target_link_libraries(ge_runner PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    adump_server
    msprofiler
    static_mmpa
    ge_proto_common
    ge_proto_client
    -Wl,--no-as-needed
    graph
    ge_common
@@ -656,9 +748,7 @@ target_link_libraries(ge_runner
    register
    c_sec
    slog
    msprof
    runtime
    resource
    error_manager
    ascend_hal_stub
    -Wl,--as-needed
@@ -668,7 +758,9 @@ target_link_libraries(ge_runner
 )

 ############ libge_compiler.so ############
 add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS})
 add_library(ge_compiler SHARED
    ${INFER_SRC_LIST}
 )

 target_compile_definitions(ge_compiler PRIVATE
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
@@ -681,9 +773,12 @@ target_compile_definitions(ge_compiler PRIVATE

 target_compile_options(ge_compiler PRIVATE
    -O2
    -fno-common
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
    $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable>
 )

 target_include_directories(ge_compiler PRIVATE
 target_include_directories(ge_compiler SYSTEM PRIVATE
    ${GE_CODE_DIR}/ge
    ${GE_CODE_DIR}/ge/analyzer
    ${GE_CODE_DIR}/inc
@@ -709,10 +804,10 @@ target_include_directories(ge_compiler PRIVATE
    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
 )

 target_link_libraries(ge_compiler
 target_link_libraries(ge_compiler PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ge_memory
    static_mmpa
    ge_proto_common
    -Wl,--no-as-needed
    graph
    ge_common
@@ -722,7 +817,6 @@ target_link_libraries(ge_compiler
    error_manager
    slog
    runtime_compile
    resource
    -Wl,--as-needed
    json
    -lrt
@@ -739,7 +833,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object)
 if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a)
    execute_process(
        COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object    
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object
    )
    file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o)
 else()
@@ -748,8 +842,21 @@ endif()

 add_library(opensrc_ascendcl SHARED
    ${OBJECT_LIST}
    $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler>,msprofiler,msprofiler_object>>
 )

 add_library(msprofiler_object OBJECT IMPORTED GLOBAL)

 if (msprofiler_ext_LIBRARY_DIR)
    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object)
    execute_process(
        COMMAND ar x ${msprofiler_ext_LIBRARY_DIR}
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object
    )
    file(GLOB MSPROFILER_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o)
    set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_OBJECT_LIST})
 endif()

 target_compile_definitions(opensrc_ascendcl PRIVATE
    google=ascend_private
 )
@@ -775,13 +882,11 @@ target_link_libraries(opensrc_ascendcl PRIVATE
                     register_static
                     error_manager_static
                     adump_server
                     msprofiler
                     -Wl,--no-whole-archive
                     -Wl,--no-as-needed
                     c_sec
                     runtime
                     slog
                     msprof
                     ascend_hal_stub
                     -Wl,--as-needed
                     -lrt
@@ -797,12 +902,10 @@ set_target_properties(opensrc_ascendcl PROPERTIES
 add_custom_command(
    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc
           ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc
           ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc
    COMMAND echo "Generating stub files."
            && ${HI_PYTHON} ${CMAKE_CURRENT_LIST_DIR}/stub/gen_stubapi.py ${GE_CODE_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR}
            && mv ge_ir_build.cc stub_ge_ir_build.cc
            && mv ge_api.cc stub_ge_api.cc
            && mv ge_prof.cc stub_ge_prof.cc
            &&  echo "Generating stub files end."
    #WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
    #DEPENDS stub/gen_stubapi.py ${TOP_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR}
@@ -811,7 +914,6 @@ add_custom_command(
 add_custom_target(ge_stub
    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc
            ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc
            ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc
 )

 ##################################################################
@@ -853,7 +955,6 @@ target_include_directories(atc_stub_ge_compiler PRIVATE
 ############ stub/libge_runner.so ############
 add_library(fwk_stub_ge_runner SHARED
    stub_ge_api.cc
    stub_ge_prof.cc
    stub_ge_ir_build.cc
 )

--- a/ge/analyzer/analyzer.cc
+++ b/ge/analyzer/analyzer.cc
@@ -217,10 +217,15 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_

  json jsn;
  GraphInfoToJson(jsn, *graph_info);
  json_file_ << jsn.dump(kJsonDumpLevel) << std::endl;
  bool ret_failed = false;
  try {
    json_file_ << jsn.dump(kJsonDumpLevel) << std::endl;
  } catch (nlohmann::detail::type_error &e) {
    GELOGE(FAILED, "analyzer file [%s] failed because [%s]", json_file_name_.c_str(), e.what());
    ret_failed = true;
  }
  json_file_.close();

  return SUCCESS;
  return ret_failed ? FAILED : SUCCESS;
 }

 ge::Status Analyzer::DoAnalyze(DataInfo &data_info) {
--- a/ge/client/ge_api.cc
+++ b/ge/client/ge_api.cc
@@ -32,6 +32,7 @@
 #include "graph/common/ge_call_wrapper.h"
 #include "register/op_registry.h"
 #include "common/ge/tbe_plugin_manager.h"
 #include "toolchain/plog.h"

 using domi::OpRegistry;
 using std::map;
@@ -129,12 +130,15 @@ Status GEInitializeImpl(const std::map<string, string> &options) {

 // Initialize GE, prepare for execution, call GELib::Initialize
 Status GEInitialize(const std::map<string, string> &options) {
  if (DlogReportInitialize() != SUCCESS) {
    GELOGW("Dlog report device log initialize failed.");
  }
  return GEInitializeImpl(options);
 }

 Status GEInitialize(const std::map<AscendString, AscendString> &options) {
  std::map<std::string, std::string> str_options;
  for (auto & option : options) {
  for (auto &option : options) {
    if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) {
      GELOGE(FAILED, "GEInitialize options is nullptr.");
      return FAILED;
@@ -143,6 +147,9 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
    std::string val = option.second.GetString();
    str_options[key] = val;
  }
  if (DlogReportInitialize() != SUCCESS) {
    GELOGW("Dlog report device log initialize failed.");
  }
  return GEInitializeImpl(str_options);
 }

@@ -187,6 +194,10 @@ Status GEFinalize() {
  // to avoid memory fragment, use malloc_trim to back free stack to system
  malloc_trim(0);

  if (DlogReportFinalize() != SUCCESS) {
    GELOGW("Dlog report device log finalize failed.");
  }

  GELOGT(TRACE_STOP, "GEFinalize finished");
  return ret;
 }
--- a/ge/client/ge_prof.cc
+++ b/ge/client/ge_prof.cc
@@ -1,369 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "ge/ge_prof.h"
 #include "ge/ge_api.h"
 #include "init/gelib.h"
 #include "common/debug/log.h"
 #include "framework/common/debug/ge_log.h"
 #include "common/profiling/profiling_manager.h"
 #include "graph/load/graph_loader.h"
 #include "toolchain/prof_acl_api.h"

 using std::map;
 using std::string;
 using std::vector;

 namespace {
 const uint32_t kMaxDeviceNum = 64;
 const uint32_t kDeviceListIndex = 3;
 const std::string kProfilingInit = "prof_init";
 const std::string kProfilingFinalize = "prof_finalize";
 const std::string kProfilingStart = "prof_start";
 const std::string kProfilingStop = "prof_stop";
 const std::string kDeviceNums = "devNums";
 const std::string kDeviceIdList = "devIdList";
 const std::string kAicoreMetrics = "aicoreMetrics";

 const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToString = {
    {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"},
    {ge::kAicorePipeline, "AICORE_PIPELINE"},
    {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"},
    {ge::kAicoreMemory, "AICORE_MEMORY"},
    {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"},
    {ge::kAicoreStall, "AICORE_STALL"}};
 }  // namespace

 static bool g_graph_prof_init_ = false;
 static std::mutex g_prof_mutex_;

 namespace ge {
 struct aclgrphProfConfig {
  ProfConfig config;
 };

 Status aclgrphProfInit(const char *profiler_path, uint32_t length) {
  GELOGT(TRACE_INIT, "Graph prof init start");

  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized.");
    return FAILED;
  }

  std::lock_guard<std::mutex> lock(g_prof_mutex_);
  if (g_graph_prof_init_) {
    GELOGW("Multi graph profiling initializations.");
    return GE_PROF_MULTI_INIT;
  }

  Status ret = CheckPath(profiler_path, length);
  if (ret != SUCCESS) {
    GELOGE(ret, "Profiling config path is invalid.");
    return ret;
  }
  // if command mode is set, just return
  if (ProfilingManager::Instance().ProfilingOn()) {
    GELOGW("Graph prof init failed, cause profiling command pattern is running.");
    return GE_PROF_MODE_CONFLICT;
  }

  ret = ProfInit(profiler_path);
  if (ret != SUCCESS) {
    GELOGE(ret, "ProfInit init fail");
    return ret;
  }

  GraphLoader graph_loader;
  Command command;
  command.cmd_params.clear();
  command.cmd_type = kProfilingInit;
  command.module_index = PROF_MODEL_LOAD;
  ret = graph_loader.CommandHandle(command);
  if (ret != SUCCESS) {
    GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path);
    return ret;
  }
  if (!g_graph_prof_init_) {
    g_graph_prof_init_ = true;
    GELOGI("Profiling init successfully.");
  }

  GELOGI("Successfully execute GraphProfInit.");
  return SUCCESS;
 }

 Status aclgrphProfFinalize() {
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized.");
    return FAILED;
  }
  std::lock_guard<std::mutex> lock(g_prof_mutex_);
  // if command mode is set, just return
  if (ProfilingManager::Instance().ProfilingOn()) {
    GELOGW("Graph prof finalize failed, cause profiling command pattern is running.");
    return GE_PROF_MODE_CONFLICT;
  }

  if (!g_graph_prof_init_) {
    GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize.");
    return GE_PROF_NOT_INIT;
  }
  GraphLoader graph_loader;
  Command command;
  command.cmd_params.clear();
  command.cmd_type = kProfilingFinalize;
  Status ret = graph_loader.CommandHandle(command);
  if (ret != SUCCESS) {
    GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str());
    return ret;
  }

  ret = ProfFinalize();
  if (ret != SUCCESS) {
    GELOGE(ret, "Finalize profiling failed, result = %d", ret);
  }

  if (ret == SUCCESS) {
    g_graph_prof_init_ = false;
    GELOGI("Successfully execute GraphProfFinalize.");
  }
  return ret;
 }

 bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<string> &prof_config_params) {
  prof_config_params.clear();
  prof_config_params.emplace_back(kDeviceNums);
  prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums));
  prof_config_params.emplace_back(kDeviceIdList);
  std::string devID = "";
  if (profiler_config->config.devNums == 0) {
    GELOGW("The device num is invalid.");
    return false;
  }
  for (uint32_t i = 0; i < profiler_config->config.devNums; i++) {
    devID.append(std::to_string(profiler_config->config.devIdList[i]));
    if (i != profiler_config->config.devNums - 1) {
      devID.append(",");
    }
  }

  prof_config_params.push_back(devID);
  prof_config_params.push_back(kAicoreMetrics);
  auto iter =
      kProfAicoreMetricsToString.find(static_cast<ProfilingAicoreMetrics>(profiler_config->config.aicoreMetrics));
  if (iter == kProfAicoreMetricsToString.end()) {
    GELOGW("The prof aicore metrics is invalid.");
    return false;
  }
  prof_config_params.push_back(iter->second);
  return true;
 }

 bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
  if (deviceid_list == nullptr) {
    GELOGE(PARAM_INVALID, "deviceIdList is nullptr");
    return false;
  }
  if (device_nums == 0 || device_nums > kMaxDeviceNum) {
    GELOGE(PARAM_INVALID, "The device nums is invalid.");
    return false;
  }

  // real device num
  int32_t dev_count = 0;
  rtError_t rt_err = rtGetDeviceCount(&dev_count);
  if (rt_err != RT_ERROR_NONE) {
    GELOGE(INTERNAL_ERROR, "Get the Device count fail.");
    return false;
  }

  if (device_nums > static_cast<uint32_t>(dev_count)) {
    GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count);
    return false;
  }

  std::unordered_set<uint32_t> record;
  for (size_t i = 0; i < device_nums; ++i) {
    uint32_t dev_id = deviceid_list[i];
    if (dev_id >= static_cast<uint32_t>(dev_count)) {
      GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count);
      return false;
    }
    if (record.count(dev_id) > 0) {
      GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id);
      return false;
    }
    record.insert(dev_id);
  }
  return true;
 }

 aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums,
                                           ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events,
                                           uint64_t data_type_config) {
  if (!isProfConfigValid(deviceid_list, device_nums)) {
    return nullptr;
  }
  aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig();
  if (config == nullptr) {
    GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail");
    return nullptr;
  }
  config->config.devNums = device_nums;
  if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list,
               device_nums * sizeof(uint32_t)) != EOK) {
    GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums);
    delete config;
    return nullptr;
  }

  config->config.aicoreMetrics = static_cast<ProfAicoreMetrics>(aicore_metrics);
  config->config.dataTypeConfig = data_type_config;
  GELOGI("Successfully create prof config.");
  return config;
 }

 Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) {
  if (profiler_config == nullptr) {
    GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr");
    return PARAM_INVALID;
  }

  delete profiler_config;
  GELOGI("Successfully destroy prof config.");
  return SUCCESS;
 }

 Status aclgrphProfStart(aclgrphProfConfig *profiler_config) {
  if (profiler_config == nullptr) {
    GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid.");
    return FAILED;
  }
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized.");
    return FAILED;
  }

  std::lock_guard<std::mutex> lock(g_prof_mutex_);
  // if command mode is set, just return
  if (ProfilingManager::Instance().ProfilingOn()) {
    GELOGW("Graph prof finalize failed, cause profiling command pattern is running.");
    return GE_PROF_MODE_CONFLICT;
  }
  if (!g_graph_prof_init_) {
    GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize.");
    return GE_PROF_NOT_INIT;
  }

  Status ret = ProfStartProfiling(&profiler_config->config);
  if (ret != SUCCESS) {
    GELOGE(ret, "Start profiling failed, prof result = %d", ret);
    return FAILED;
  }

  std::vector<string> prof_params;
  if (!TransProfConfigToParam(profiler_config, prof_params)) {
    GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed");
    return PARAM_INVALID;
  }

  GraphLoader graph_loader;
  Command command;
  command.cmd_params.clear();
  command.cmd_type = kProfilingStart;
  command.cmd_params = prof_params;
  command.module_index = profiler_config->config.dataTypeConfig;
  GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(),
         prof_params[kDeviceListIndex].c_str(), command.module_index);
  ret = graph_loader.CommandHandle(command);
  if (ret != SUCCESS) {
    GELOGE(ret, "Handle profiling command failed");
    return FAILED;
  }

  GELOGI("Successfully execute GraphProfStartProfiling.");

  return SUCCESS;
 }

 Status aclgrphProfStop(aclgrphProfConfig *profiler_config) {
  if (profiler_config == nullptr) {
    GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid.");
    return FAILED;
  }
  std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized.");
    return FAILED;
  }

  std::lock_guard<std::mutex> lock(g_prof_mutex_);
  // if command mode is set, just return
  if (ProfilingManager::Instance().ProfilingOn()) {
    GELOGW("Graph prof finalize failed, cause profiling command pattern is running.");
    return GE_PROF_MODE_CONFLICT;
  }
  if (!g_graph_prof_init_) {
    GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize.");
    return GE_PROF_NOT_INIT;
  }

  for (uint32_t i = 0; i < profiler_config->config.devNums; i++) {
    uint64_t data_type_config;
    Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config);
    if (status != SUCCESS) {
      GELOGE(status, "Prof get data type config failed, prof result = %d", status);
      return status;
    }
    if (data_type_config != profiler_config->config.dataTypeConfig) {
      GELOGE(FAILED, "data type config verify failed");
      return FAILED;
    }
  }

  std::vector<string> prof_params;
  if (!TransProfConfigToParam(profiler_config, prof_params)) {
    GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed");
    return PARAM_INVALID;
  }

  GraphLoader graph_loader;
  Command command;
  command.cmd_params.clear();
  command.cmd_type = kProfilingStop;
  command.cmd_params = prof_params;
  command.module_index = profiler_config->config.dataTypeConfig;
  GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(),
         prof_params[kDeviceListIndex].c_str(), command.module_index);
  Status ret = graph_loader.CommandHandle(command);
  if (ret != SUCCESS) {
    GELOGE(ret, "Handle profiling command failed");
    return FAILED;
  }

  ret = ProfStopProfiling(&profiler_config->config);
  if (ret != SUCCESS) {
    GELOGE(ret, "Stop profiling failed, prof result = %d", ret);
    return ret;
  }

  GELOGI("Successfully execute GraphProfStopProfiling.");
  return SUCCESS;
 }
 }  // namespace ge
--- a/ge/client/module.mk
+++ b/ge/client/module.mk
@@ -4,7 +4,6 @@ LOCAL_PATH := $(call my-dir)
 COMMON_LOCAL_SRC_FILES := \
    proto/ge_api.proto \
    ge_api.cc \
    ge_prof.cc \


 COMMON_LOCAL_C_INCLUDES := \
@@ -69,9 +68,9 @@ LOCAL_SHARED_LIBRARIES := \
    libgraph \
    libregister \
    libge_compiler \
    libge_common \
    libmsprof
    libge_common

 LOCAL_STATIC_LIBRARIES += libmsprofiler_fwk \


 LOCAL_LDFLAGS := -lrt -ldl
@@ -104,8 +103,10 @@ LOCAL_SHARED_LIBRARIES := \
    libregister \
    libruntime \
    libge_compiler \
    libge_common \
    libmsprof
    libge_common


 LOCAL_STATIC_LIBRARIES += libmsprofiler_fwk \


 LOCAL_LDFLAGS := -lrt -ldl
--- a/ge/client/proto/ge_ir.proto
+++ b/ge/client/proto/ge_ir.proto
@@ -30,6 +30,7 @@ enum DataType
    DT_RESOURCE  = 23;         // resource type
    DT_STRING_REF = 24;        // string_ref type
    DT_DUAL      = 25;              /**< dual output type */
    DT_VARIANT = 26;           // variant type
 }

 message AttrDef
--- a/ge/common/CMakeLists.txt
+++ b/ge/common/CMakeLists.txt
@@ -24,6 +24,7 @@ set(SRC_LIST
    "helper/om_file_helper.cc"
    "helper/model_helper.cc"
    "../model/ge_model.cc"
    "../model/ge_root_model.cc"
    "auth/file_saver.cc"
    "fp16_t.cc"
    "math/fp16_math.cc"
@@ -79,6 +80,7 @@ target_compile_options(ge_common PRIVATE
    -O2
    -Werror
    -Wno-deprecated-declarations
    -fno-common
 )

 target_include_directories(ge_common PRIVATE
@@ -129,10 +131,11 @@ target_compile_definitions(ge_common_static PRIVATE
    google=ascend_private
    $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
    $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
    LOG_CPP
 )

 target_compile_options(ge_common_static PRIVATE
    $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations>
    $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
    $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
    $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
 )
@@ -177,12 +180,15 @@ target_compile_definitions(ge_common PRIVATE
    FMK_SUPPORT_DUMP
    OS_CENTOS
    google=ascend_private
    LOG_CPP
 )

 target_compile_options(ge_common PRIVATE
    -fvisibility=hidden
    -O2
    -Werror
    -Wno-deprecated-declarations
    -fno-common
 )

 target_include_directories(ge_common PRIVATE
--- a/ge/common/auth/file_saver.cc
+++ b/ge/common/auth/file_saver.cc
@@ -54,8 +54,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) {
 Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size == 0 || data == nullptr, return PARAM_INVALID);
  mmSsize_t write_count;
  uint32_t size_2g = ((uint32_t) 0x1 << 31);
  uint32_t size_1g = ((uint32_t) 0x1 << 30);
  uint32_t size_2g = 2147483648;  // 0x1 << 31
  uint32_t size_1g = 1073741824;  // 0x1 << 30
  // Write data
  if (size > size_2g) {
    auto seek = reinterpret_cast<uint8_t *>(const_cast<void *>(data));
@@ -258,6 +258,65 @@ FileSaver::SaveToFile(const string &file_path, ModelFileHeader &file_header, Mod
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
 FileSaver::SaveToFile(const string &file_path, ModelFileHeader &file_header,
                      vector<ModelPartitionTable *> &model_partition_tables,
                      const vector<vector<ModelPartition>> &all_partition_datas) {
  file_header.is_encrypt = ModelEncryptType::UNENCRYPTED;

  const Status ret = SaveWithFileHeader(file_path, file_header, model_partition_tables, all_partition_datas);
  GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, FAILED, "save file failed, file_path:%s, file header len:%u.",
                         file_path.c_str(), file_header.length);
  return SUCCESS;
 }

 Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header,
                                     vector<ModelPartitionTable *> &model_partition_tables,
                                     const vector<vector<ModelPartition>> &all_partition_datas) {

  GE_CHK_BOOL_EXEC(model_partition_tables.size() == all_partition_datas.size(),
                   return PARAM_INVALID,
                   "model table size %zu does not match partition size %zu",
                   model_partition_tables.size(), all_partition_datas.size())
  for (size_t index = 0; index < model_partition_tables.size(); ++index) {
    auto &cur_partiton_data = all_partition_datas[index];
    auto &cur_model_partition_table = *model_partition_tables[index];
    GE_CHK_BOOL_RET_STATUS(!cur_partiton_data.empty() && cur_model_partition_table.num != 0
                           && cur_model_partition_table.num == cur_partiton_data.size(), FAILED,
                           "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).",
                           cur_model_partition_table.num, cur_partiton_data.size());
  }

  // Open file
  int32_t fd = 0;
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(OpenFile(fd, file_path) != SUCCESS, return FAILED);
  Status ret = SUCCESS;
  do {
    // Write file header
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
        WriteData(static_cast<const void *>(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED;
        break);
    for (size_t index = 0; index < model_partition_tables.size(); ++index) {
      // Write model partition table
      auto &cur_tabel = *model_partition_tables[index];
      uint32_t table_size = static_cast<uint32_t>(SIZE_OF_MODEL_PARTITION_TABLE(cur_tabel));
      GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
          WriteData(static_cast<const void *>(&cur_tabel), table_size, fd) != SUCCESS, ret = FAILED; break);
      // Write partition data
      auto &cur_partition_datas = all_partition_datas[index];
      for (const auto &partition_data : cur_partition_datas) {
        GELOGI("GC:size[%zu]", partition_data.size);
        GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
            WriteData(static_cast<const void *>(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED;
            break);
      }
    }
  } while (0);
  // Close file
  GE_CHK_BOOL_RET_STATUS(mmClose(fd) == EN_OK, FAILED, "Close file failed.");
  return ret;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::SaveToFile(const string &file_path, const void *data,
                                                                              int len) {
  if (data == nullptr || len <= 0) {
--- a/ge/common/auth/file_saver.h
+++ b/ge/common/auth/file_saver.h
@@ -74,6 +74,10 @@ class FileSaver {
                           ModelPartitionTable &model_partition_table,
                           const std::vector<ModelPartition> &partition_datas);

  static Status SaveToFile(const string &file_path, ModelFileHeader &file_header,
                        vector<ModelPartitionTable *> &model_partition_tables,
                        const vector<vector<ModelPartition>> &all_partition_datas);

  static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header,
                                            ModelPartitionTable &model_partition_table,
                                            const std::vector<ModelPartition> &partitionDatas,
@@ -108,6 +112,9 @@ class FileSaver {
  static Status SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header,
                                   ModelPartitionTable &model_partition_table,
                                   const std::vector<ModelPartition> &partition_datas);
  static Status SaveWithFileHeader(const std::string &file_path, const ModelFileHeader &file_header,
                                       vector<ModelPartitionTable *> &model_partition_tables,
                                       const vector<vector<ModelPartition>> &all_partition_datas);
 };
 }  // namespace ge
 #endif  // GE_COMMON_AUTH_FILE_SAVER_H_
--- a/ge/common/base64.h
+++ b/ge/common/base64.h
@@ -25,32 +25,38 @@

 namespace ge {
 namespace {
 const char* kBase64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                           "abcdefghijklmnopqrstuvwxyz"
                           "0123456789+/";
 const char *kBase64Chars =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  "abcdefghijklmnopqrstuvwxyz"
  "0123456789+/";
 const char kEqualSymbol = '=';
 const size_t kBase64CharsNum = 64;
 const size_t kThreeByteOneGroup = 3;
 const size_t kFourByteOneGroup = 4;
 }
 const size_t kThreeByteOneGroupIndex0 = 0;
 const size_t kThreeByteOneGroupIndex1 = 1;
 const size_t kThreeByteOneGroupIndex2 = 2;
 const size_t kFourByteOneGroupIndex0 = 0;
 const size_t kFourByteOneGroupIndex1 = 1;
 const size_t kFourByteOneGroupIndex2 = 2;
 const size_t kFourByteOneGroupIndex3 = 3;
 }  // namespace

 namespace base64 {
 static inline bool IsBase64Char(const char &c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
 }
 static inline bool IsBase64Char(const char &c) { return (isalnum(c) || (c == '+') || (c == '/')); }

 static std::string EncodeToBase64(const std::string &raw_data) {
  size_t encode_length = raw_data.size() / kThreeByteOneGroup * kFourByteOneGroup;
  encode_length += raw_data.size() % kThreeByteOneGroup == 0 ? 0 : kFourByteOneGroup;
  size_t raw_data_index = 0 ;
  size_t raw_data_index = 0;
  size_t encode_data_index = 0;
  std::string encode_data;
  encode_data.resize(encode_length);

  for (; raw_data_index + kThreeByteOneGroup <= raw_data.size(); raw_data_index += kThreeByteOneGroup) {
    auto char_1 = static_cast<uint8_t>(raw_data[raw_data_index]);
    auto char_2 = static_cast<uint8_t>(raw_data[raw_data_index + 1]);
    auto char_3 = static_cast<uint8_t>(raw_data[raw_data_index + 2]);
    auto char_2 = static_cast<uint8_t>(raw_data[raw_data_index + kThreeByteOneGroupIndex1]);
    auto char_3 = static_cast<uint8_t>(raw_data[raw_data_index + kThreeByteOneGroupIndex2]);
    encode_data[encode_data_index++] = kBase64Chars[char_1 >> 2u];
    encode_data[encode_data_index++] = kBase64Chars[((char_1 << 4u) & 0x30) | (char_2 >> 4u)];
    encode_data[encode_data_index++] = kBase64Chars[((char_2 << 2u) & 0x3c) | (char_3 >> 6u)];
@@ -80,8 +86,7 @@ static std::string EncodeToBase64(const std::string &raw_data) {
 #pragma GCC diagnostic ignored "-Wunused-function"
 static Status DecodeFromBase64(const std::string &base64_data, std::string &decode_data) {
  if (base64_data.size() % kFourByteOneGroup != 0) {
    GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu",
           base64_data.size());
    GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", base64_data.size());
    return PARAM_INVALID;
  }
  decode_data.clear();
@@ -92,10 +97,10 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco
    return static_cast<uint8_t>(std::distance(kBase64Chars, char_pos)) & 0xff;
  };

  for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += 4) {
  for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += kFourByteOneGroup) {
    for (size_t i = 0; i < kFourByteOneGroup; ++i) {
      if (base64_data[input_data_index + i] == kEqualSymbol &&
          input_data_index >= base64_data_len - 4 && i > 1) {
          input_data_index >= base64_data_len - kFourByteOneGroup && i > 1) {
        byte_4[i] = kBase64CharsNum;
      } else if (IsBase64Char(base64_data[input_data_index + i])) {
        byte_4[i] = FindCharInBase64Chars(base64_data[input_data_index + i]);
@@ -104,19 +109,23 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco
        return PARAM_INVALID;
      }
    }
    decode_data += static_cast<char>((byte_4[0] << 2u) + ((byte_4[1] & 0x30) >> 4u));
    if (byte_4[2] >= kBase64CharsNum){
    decode_data +=
      static_cast<char>((byte_4[kFourByteOneGroupIndex0] << 2u) + ((byte_4[kFourByteOneGroupIndex1] & 0x30) >> 4u));
    if (byte_4[kFourByteOneGroupIndex2] >= kBase64CharsNum) {
      break;
    } else if (byte_4[3] >= kBase64CharsNum) {
      decode_data += static_cast<char>(((byte_4[1] & 0x0f) << 4u)  + ((byte_4[2] & 0x3c) >> 2u));
    } else if (byte_4[kFourByteOneGroupIndex3] >= kBase64CharsNum) {
      decode_data += static_cast<char>(((byte_4[kFourByteOneGroupIndex1] & 0x0f) << 4u) +
                                       ((byte_4[kFourByteOneGroupIndex2] & 0x3c) >> 2u));
      break;
    }
    decode_data += static_cast<char>(((byte_4[1] & 0x0f) << 4u)  + ((byte_4[2] & 0x3c) >> 2u));
    decode_data += static_cast<char>(((byte_4[2] & 0x03) << 6u)  + byte_4[3]);
    decode_data += static_cast<char>(((byte_4[kFourByteOneGroupIndex1] & 0x0f) << 4u) +
                                     ((byte_4[kFourByteOneGroupIndex2] & 0x3c) >> 2u));
    decode_data +=
      static_cast<char>(((byte_4[kFourByteOneGroupIndex2] & 0x03) << 6u) + byte_4[kFourByteOneGroupIndex3]);
  }
  return SUCCESS;
 }
 #pragma GCC diagnostic pop
 }
 }  // namespace base64
 }  // namespace ge
 #endif  // GE_COMMON_BASE64_H_
--- a/ge/common/debug/memory_dumper.cc
+++ b/ge/common/debug/memory_dumper.cc
@@ -139,7 +139,8 @@ int MemoryDumper::OpenFile(const char *filename) {
  GE_IF_BOOL_EXEC(
    -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos);
    string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!");
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH,
        return kInvalidFd, "Prefix path is too long!");
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmRealPath(prefix_path.c_str(), tmp_path, MMPA_MAX_PATH) != EN_OK, return kInvalidFd,
                                   "Dir %s does not exit.", prefix_path.c_str());
    real_path = std::string(tmp_path) + last_path;)
--- a/ge/common/dump/dump_op.cc
+++ b/ge/common/dump/dump_op.cc
@@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
    for (auto dim : output_descs.at(i).GetShape().GetDims()) {
      output.mutable_shape()->add_dim(dim);
    }
    for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
      output.mutable_origin_shape()->add_dim(dim);
    }
    int64_t output_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
@@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
    for (auto dim : input_descs.at(i).GetShape().GetDims()) {
      input.mutable_shape()->add_dim(dim);
    }
    for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
      input.mutable_origin_shape()->add_dim(dim);
    }
    int64_t input_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
@@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() {
  SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
         dump_path.c_str());

  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
  }
  aicpu::dump::Task task;
  task.set_task_id(task_id);
  task.set_stream_id(stream_id);
  task.mutable_op()->set_op_name(op_desc_->GetName());
  task.mutable_op()->set_op_type(op_desc_->GetType());
  if (dump_properties_.GetDumpMode() == kDumpOutput) {
--- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
@@ -23,12 +23,30 @@
 #include "common/formats/utils/formats_trans_utils.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/types.h"
 #include "graph/utils/type_utils.h"

 namespace ge {
 namespace formats {
 namespace {
 const int kDimSize4D = 4;

 const size_t kSingleDim = 1;

 const size_t kNdDimIndexN = 0;
 const size_t kNdDimIndexH = 1;
 const size_t kNdDimIndexW = 2;

 const size_t kDimDValueBNdFNz = 2;  // dim d-value between Nd and FractalZz

 const size_t kNdDimCountBackwardsW = 1;
 const size_t kNdDimCountBackwardsWH = 2;

 const size_t kFNzDimCountBackwardsW0 = 1;
 const size_t kFNzDimCountBackwardsW0H0 = 2;
 const size_t kFNzDimCountBackwardsW0H0H1 = 3;
 const size_t kFNzDimCountBackwardsW0H0H1W1 = 4;

 bool IsDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0; }

 using ShapeVector = std::vector<int64_t>;
@@ -60,14 +78,14 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap
  auto w0 = GetCubeSizeByDataType(data_type);
  int64_t h0 = kCubeSize;
  switch (src_shape.size()) {
    case 1:
      dst_shape.push_back(Ceil(src_shape[0], w0));
      dst_shape.push_back(1);
    case kSingleDim:
      dst_shape.push_back(Ceil(src_shape[kNdDimIndexN], w0));
      dst_shape.push_back(DIM_DEFAULT_VALUE);
      dst_shape.push_back(h0);
      dst_shape.push_back(w0);
      hw_shape.push_back(1);
      hw_shape.push_back(1);
      hw_shape.push_back(src_shape[0]);
      hw_shape.push_back(DIM_DEFAULT_VALUE);
      hw_shape.push_back(DIM_DEFAULT_VALUE);
      hw_shape.push_back(src_shape[kNdDimIndexN]);
      if (!IsShapeValid(dst_shape)) {
        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
        return PARAM_INVALID;
@@ -76,17 +94,17 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap
    default:
      auto size = src_shape.size();
      int64_t times = 1;
      for (size_t i = 0; i != size - 2; i++) {
      for (size_t i = 0; i != size - kDimDValueBNdFNz; i++) {
        dst_shape.push_back(src_shape[i]);
        times *= src_shape[i];
      }
      dst_shape.push_back(Ceil(src_shape[size - 1], w0));
      dst_shape.push_back(Ceil(src_shape[size - 2], h0));
      dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsW], w0));
      dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsWH], h0));
      dst_shape.push_back(h0);
      dst_shape.push_back(w0);
      hw_shape.push_back(times);
      hw_shape.push_back(src_shape[size - 2]);
      hw_shape.push_back(src_shape[size - 1]);
      hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]);
      hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]);
      if (!IsShapeValid(dst_shape)) {
        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
        return PARAM_INVALID;
@@ -128,16 +146,16 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con
  }

  // src&dst_shape can be written as times*H*W & times*W1*H1*H0*W0, respectively. dst_shape_size >= kDimNum4D
  auto times = hw_shape.at(0);
  auto h = hw_shape.at(1);
  auto w = hw_shape.at(2);
  auto times = hw_shape.at(kNdDimIndexN);
  auto h = hw_shape.at(kNdDimIndexH);
  auto w = hw_shape.at(kNdDimIndexW);
  auto hw = h * w;

  auto shape_size = args.dst_shape.size();
  auto w1 = args.dst_shape[shape_size - 4];
  auto h1 = args.dst_shape[shape_size - 3];
  auto h0 = args.dst_shape[shape_size - 2];
  auto w0 = args.dst_shape[shape_size - 1];
  auto w1 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0H1W1];
  auto h1 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0H1];
  auto h0 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0H0];
  auto w0 = args.dst_shape[shape_size - kFNzDimCountBackwardsW0];
  auto h1h0 = h1 * h0;
  auto h1h0w0 = h1h0 * w0;
  auto w1h1h0w0 = w1 * h1h0w0;
@@ -198,16 +216,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con
    return OUT_OF_MEMORY;
  }

  auto times = dst_hw_shape.at(0);
  auto h = dst_hw_shape.at(1);
  auto w = dst_hw_shape.at(2);
  auto times = dst_hw_shape.at(kNdDimIndexN);
  auto h = dst_hw_shape.at(kNdDimIndexH);
  auto w = dst_hw_shape.at(kNdDimIndexW);
  auto hw = h * w;

  auto shape_size = args.src_shape.size();
  auto w1 = args.src_shape[shape_size - 4];
  auto h1 = args.src_shape[shape_size - 3];
  auto h0 = args.src_shape[shape_size - 2];
  auto w0 = args.src_shape[shape_size - 1];
  auto w1 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0H1W1];
  auto h1 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0H1];
  auto h0 = args.src_shape[shape_size - kFNzDimCountBackwardsW0H0];
  auto w0 = args.src_shape[shape_size - kFNzDimCountBackwardsW0];
  auto h1h0 = h1 * h0;
  auto h1h0w0 = h1h0 * w0;
  auto w1h1h0w0 = w1 * h1h0w0;
--- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
+++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
@@ -23,12 +23,29 @@
 #include "common/formats/utils/formats_trans_utils.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/types.h"
 #include "graph/utils/type_utils.h"

 namespace ge {
 namespace formats {
 namespace {
 const int kDimSize4D = 4;

 const size_t kSingleDim = 1;

 const size_t kNdDimIndexN = 0;
 const size_t kNdDimIndexH = 1;
 const size_t kNdDimIndexW = 2;

 const size_t kDimDValueBNdFZz = 2;  // dim d-value between Nd and FractalZz

 const size_t kNdDimCountBackwardsW = 1;
 const size_t kNdDimCountBackwardsWH = 2;

 const size_t kFZzDimCountBackwardsW0 = 1;
 const size_t kFZzDimCountBackwardsW0H0 = 2;
 const size_t kFZzDimCountBackwardsW0H0W1 = 3;
 const size_t kFZzDimCountBackwardsW0H0W1H1 = 4;
 bool IsDataTypeSupport(DataType d_type) { return GetSizeByDataType(d_type) > 0; }

 using ShapeVector = std::vector<int64_t>;
@@ -40,8 +57,8 @@ bool CheckShape(Format format, const ShapeVector &shape) {
    case FORMAT_NHWC:
      return CheckShapeValid(shape, kDimSize4D);
    default:
      std::string error = "Trans format between " +  FmtToStr(TypeUtils::FormatToSerialString(format)) +
          " and FORMAT_FRACTAL_ZZ is not supported.";
      std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) +
                          " and FORMAT_FRACTAL_ZZ is not supported.";
      GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str());
      return false;
  }
@@ -60,14 +77,14 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap
  auto w0 = GetCubeSizeByDataType(data_type);
  auto h0 = GetCubeSizeByDataType(data_type);
  switch (src_shape.size()) {
    case 1:
      dst_shape.push_back(1);
      dst_shape.push_back(Ceil(src_shape[0], w0));
    case kSingleDim:
      dst_shape.push_back(DIM_DEFAULT_VALUE);
      dst_shape.push_back(Ceil(src_shape[kNdDimIndexN], w0));
      dst_shape.push_back(h0);
      dst_shape.push_back(w0);
      hw_shape.push_back(1);
      hw_shape.push_back(1);
      hw_shape.push_back(src_shape[0]);
      hw_shape.push_back(DIM_DEFAULT_VALUE);
      hw_shape.push_back(DIM_DEFAULT_VALUE);
      hw_shape.push_back(src_shape[kNdDimIndexN]);
      if (!IsShapeValid(dst_shape)) {
        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
        return PARAM_INVALID;
@@ -76,17 +93,17 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap
    default:
      auto size = src_shape.size();
      int64_t times = 1;
      for (size_t i = 0; i != size - 2; i++) {
      for (size_t i = 0; i != size - kDimDValueBNdFZz; i++) {
        dst_shape.push_back(src_shape[i]);
        times *= src_shape[i];
      }
      dst_shape.push_back(Ceil(src_shape[size - 2], h0));
      dst_shape.push_back(Ceil(src_shape[size - 1], w0));
      dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsWH], h0));
      dst_shape.push_back(Ceil(src_shape[size - kNdDimCountBackwardsW], w0));
      dst_shape.push_back(h0);
      dst_shape.push_back(w0);
      hw_shape.push_back(times);
      hw_shape.push_back(src_shape[size - 2]);
      hw_shape.push_back(src_shape[size - 1]);
      hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]);
      hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]);
      if (!IsShapeValid(dst_shape)) {
        GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str());
        return PARAM_INVALID;
@@ -127,16 +144,16 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
    return OUT_OF_MEMORY;
  }
  // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D
  auto times = hw_shape.at(0);
  auto h = hw_shape.at(1);
  auto w = hw_shape.at(2);
  auto times = hw_shape.at(kNdDimIndexN);
  auto h = hw_shape.at(kNdDimIndexH);
  auto w = hw_shape.at(kNdDimIndexW);
  auto hw = h * w;

  auto shape_size = args.dst_shape.size();
  auto h1 = args.dst_shape[shape_size - 4];
  auto w1 = args.dst_shape[shape_size - 3];
  auto h0 = args.dst_shape[shape_size - 2];
  auto w0 = args.dst_shape[shape_size - 1];
  auto h1 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0W1H1];
  auto w1 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0W1];
  auto h0 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0H0];
  auto w0 = args.dst_shape[shape_size - kFZzDimCountBackwardsW0];
  auto h0w0 = h0 * w0;
  auto w1h0w0 = w1 * h0w0;
  auto h1w1h0w0 = h1 * w1h0w0;
@@ -155,8 +172,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
          auto src_offset = (src_h_head + w1_idx * w0) * size;
          auto dst_offset = (h0_head + w1_idx * h0w0) * size;
          auto protected_size = dst_size - dst_offset < static_cast<int64_t>(SECUREC_MEM_MAX_LEN)
                                    ? dst_size - dst_offset
                                    : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
                                  ? dst_size - dst_offset
                                  : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size * w0));
          if (ret != EOK) {
@@ -171,8 +188,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
          auto src_offset = (src_h_head + src_w_idx) * size;
          auto dst_offset = (w0_head + w0_idx) * size;
          auto protected_size = dst_size - dst_offset < static_cast<int64_t>(SECUREC_MEM_MAX_LEN)
                                    ? dst_size - dst_offset
                                    : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
                                  ? dst_size - dst_offset
                                  : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size));
          if (ret != EOK) {
@@ -205,16 +222,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
  }

  // The src&dst_shape can be written as times*H*W & times*H1*W1*H0*W0, respectively. dst_shape_size >= kDimNum4D
  auto times = dst_hw_shape.at(0);
  auto h = dst_hw_shape.at(1);
  auto w = dst_hw_shape.at(2);
  auto times = dst_hw_shape.at(kNdDimIndexN);
  auto h = dst_hw_shape.at(kNdDimIndexH);
  auto w = dst_hw_shape.at(kNdDimIndexW);
  auto hw = h * w;

  auto shape_size = args.src_shape.size();
  auto h1 = args.src_shape[shape_size - 4];
  auto w1 = args.src_shape[shape_size - 3];
  auto h0 = args.src_shape[shape_size - 2];
  auto w0 = args.src_shape[shape_size - 1];
  auto h1 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0W1H1];
  auto w1 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0W1];
  auto h0 = args.src_shape[shape_size - kFZzDimCountBackwardsW0H0];
  auto w0 = args.src_shape[shape_size - kFZzDimCountBackwardsW0];
  auto h0w0 = h0 * w0;
  auto w1h0w0 = w1 * h0w0;
  auto h1w1h0w0 = h1 * w1h0w0;
@@ -233,8 +250,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
          auto src_offset = (h0_head + w1_idx * h0w0) * size;
          auto dst_offset = (dst_h_head + w1_idx * w0) * size;
          auto protected_size = dst_size - dst_offset < static_cast<int64_t>(SECUREC_MEM_MAX_LEN)
                                    ? dst_size - dst_offset
                                    : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
                                  ? dst_size - dst_offset
                                  : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size * w0));
          if (ret != EOK) {
@@ -249,8 +266,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
          auto dst_w_idx = w1_head + w0_idx;
          auto dst_offset = (dst_h_head + dst_w_idx) * size;
          auto protected_size = dst_size - dst_offset < static_cast<int64_t>(SECUREC_MEM_MAX_LEN)
                                    ? dst_size - dst_offset
                                    : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
                                  ? dst_size - dst_offset
                                  : static_cast<int64_t>(SECUREC_MEM_MAX_LEN);
          auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
                              static_cast<size_t>(size));
          if (ret != EOK) {
--- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
+++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
@@ -35,7 +35,6 @@
 *      Padding to (N, ceil(Z/16)*16)
 *  Last Step: View the (N, ceil(Z/16)*16) as 4D (N/16, 16, C/16, 16) and transpose to (C/16, N/16, 16, 16)
 */

 namespace ge {
 namespace formats {
 namespace {
--- a/ge/common/formats/format_transfers/format_transfer_transpose.cc
+++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc
@@ -19,6 +19,7 @@
 #include <securec.h>
 #include <memory>

 #include "common/formats/utils/formats_definitions.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
@@ -29,21 +30,21 @@ namespace formats {
 namespace {
 std::map<Format, std::map<Format, std::vector<int64_t>>> perm_args{
    {FORMAT_NCHW,
     {{FORMAT_NHWC, std::vector<int64_t>({0, 2, 3, 1})},
      {FORMAT_HWCN, std::vector<int64_t>({2, 3, 1, 0})},
      {FORMAT_CHWN, std::vector<int64_t>({1, 2, 3, 0})}}},
     {{FORMAT_NHWC, std::vector<int64_t>({kNchwN, kNchwH, kNchwW, kNchwC})},
      {FORMAT_HWCN, std::vector<int64_t>({kNchwH, kNchwW, kNchwC, kNchwN})},
      {FORMAT_CHWN, std::vector<int64_t>({kNchwC, kNchwH, kNchwW, kNchwN})}}},
    {FORMAT_NHWC,
     {{FORMAT_NCHW, std::vector<int64_t>({0, 3, 1, 2})},
      {FORMAT_CHWN, std::vector<int64_t>({3, 1, 2, 0})},
      {FORMAT_HWCN, std::vector<int64_t>({1, 2, 3, 0})}}},
     {{FORMAT_NCHW, std::vector<int64_t>({kNhwcN, kNhwcC, kNhwcH, kNhwcW})},
      {FORMAT_CHWN, std::vector<int64_t>({kNhwcC, kNhwcH, kNhwcW, kNhwcN})},
      {FORMAT_HWCN, std::vector<int64_t>({kNhwcH, kNhwcW, kNhwcC, kNhwcN})}}},
    {FORMAT_HWCN,
     {{FORMAT_NCHW, std::vector<int64_t>({3, 2, 0, 1})},
      {FORMAT_NHWC, std::vector<int64_t>({3, 0, 1, 2})},
      {FORMAT_CHWN, std::vector<int64_t>({2, 0, 1, 3})}}},
     {{FORMAT_NCHW, std::vector<int64_t>({kHwcnN, kHwcnC, kHwcnH, kHwcnW})},
      {FORMAT_NHWC, std::vector<int64_t>({kHwcnN, kHwcnH, kHwcnW, kHwcnC})},
      {FORMAT_CHWN, std::vector<int64_t>({kHwcnC, kHwcnH, kHwcnW, kHwcnN})}}},
    {FORMAT_CHWN,
     {{FORMAT_NCHW, std::vector<int64_t>({3, 0, 1, 2})},
      {FORMAT_NHWC, std::vector<int64_t>({3, 1, 2, 0})},
      {FORMAT_HWCN, std::vector<int64_t>({1, 2, 0, 3})}}},
     {{FORMAT_NCHW, std::vector<int64_t>({kChwnN, kChwnC, kChwnH, kChwnW})},
      {FORMAT_NHWC, std::vector<int64_t>({kChwnN, kChwnH, kChwnW, kChwnC})},
      {FORMAT_HWCN, std::vector<int64_t>({kChwnH, kChwnW, kChwnC, kChwnN})}}},
 };

 bool IsShapeArgValid(const std::vector<int64_t> &src_shape, const std::vector<int64_t> &perm_arg) {
--- a/ge/common/formats/utils/formats_definitions.h
+++ b/ge/common/formats/utils/formats_definitions.h
@@ -23,6 +23,7 @@ static const int kCubeSize = 16;
 static const int kNiSize = 16;
 static const int64_t kShapeItemNumMAX = 1024UL * 1024UL * 1024UL * 1024UL;


 enum NchwDimIndex {
  kNchwN,
  kNchwC,
@@ -47,6 +48,14 @@ enum HwcnDimIndex {
  kHwcnDimsNum
 };

 enum ChwnDimIndex {
  kChwnC,
  kChwnH,
  kChwnW,
  kChwnN,
  kChwnDimsNum
 };

 enum Nc1hwc0DimIndex {
  kNc1hwc0N,
  kNc1hwc0C1,
--- a/ge/common/ge/datatype_util.cc
+++ b/ge/common/ge/datatype_util.cc
@@ -62,6 +62,7 @@ std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = {
    {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
    {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
    {ge::DT_STRING, ge::proto::DT_STRING},
    {ge::DT_VARIANT, ge::proto::DT_VARIANT},
 };
 }  // namespace

--- a/ge/common/ge/plugin_manager.cc
+++ b/ge/common/ge/plugin_manager.cc
@@ -123,7 +123,10 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
    if (handle == nullptr) {
      const char *error = mmDlerror();
      GE_IF_BOOL_EXEC(error == nullptr, error = "");
      GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", error);
      ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"},
          {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)});
      GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!",
             file_path_dlopen.c_str(), error);
      continue;
    }

@@ -132,6 +135,9 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
    for (const auto &func_name : func_check_list) {
      auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str()));
      if (real_fn == nullptr) {
        ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"},
            {"mmDlsym", FmtToStr(func_name) + " is skipped since function" +
            FmtToStr(func_name) + " is not existed!"});
        GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(),
               func_name.c_str());
        is_valid = false;
--- a/ge/common/ge/tbe_plugin_manager.cc
+++ b/ge/common/ge/tbe_plugin_manager.cc
@@ -37,6 +37,8 @@
 #include "graph/utils/type_utils.h"

 namespace ge {
 const int kBaseInt = 10;

 std::map<string, string> TBEPluginManager::options_ = {};

 // Get Singleton Instance
@@ -155,7 +157,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
  domi::FrameworkType type = domi::TENSORFLOW;
  auto it = options_.find(FRAMEWORK_TYPE);
  if (it != options_.end()) {
    type = static_cast<domi::FrameworkType>(std::strtol(it->second.c_str(), nullptr, 10));
    type = static_cast<domi::FrameworkType>(std::strtol(it->second.c_str(), nullptr, kBaseInt));
  }
  fmk_type = ge::TypeUtils::FmkTypeToSerialString(type);
  GELOGI("Framework type is %s.", fmk_type.c_str());
@@ -179,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
 void TBEPluginManager::LoadCustomOpLib() {
  LoadPluginSo(options_);

  std::string fmk_type = std::to_string(domi::TENSORFLOW);
  auto it = options_.find(ge::FRAMEWORK_TYPE);
  if (it != options_.end()) {
   fmk_type = it->second;
  }
  std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
  GELOGI("The size of registration_datas is: %zu", registration_datas.size());
  for (OpRegistrationData reg_data : registration_datas) {
    GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
           TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
    domi::OpRegistry::Instance()->Register(reg_data);
    if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
      GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
             TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
      (void)domi::OpRegistry::Instance()->Register(reg_data);
    }
  }
 }

--- a/ge/common/ge_common.mk
+++ b/ge/common/ge_common.mk
@@ -7,6 +7,7 @@ GE_COMMON_LOCAL_SRC_FILES := \
    helper/om_file_helper.cc \
    helper/model_helper.cc \
    ../model/ge_model.cc \
    ../model/ge_root_model.cc \
    auth/file_saver.cc \
    fp16_t.cc \
    math/fp16_math.cc \
--- a/ge/common/helper/model_helper.cc
+++ b/ge/common/helper/model_helper.cc
@@ -32,6 +32,7 @@ using domi::ModelTaskDef;

 namespace {
 const int64_t kOriginalOmPartitionNum = 1;
 const uint32_t kStatiOmFileModelNum = 1;
 }


@@ -39,7 +40,7 @@ namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelHelper::~ModelHelper() { (void)ReleaseLocalModelData(); }

 Status ModelHelper::SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper, ModelPartitionType type,
                                       const uint8_t *data, size_t size) {
                                       const uint8_t *data, size_t size, size_t model_index) {
  if (size < 1 || size > UINT32_MAX) {
    GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu invalid", size);
    if (size > UINT32_MAX) {
@@ -68,25 +69,58 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_fil
  partition_model.data = const_cast<uint8_t *>(data);
  partition_model.size = static_cast<uint32_t>(size);
  partition_model.type = type;
  if (om_file_save_helper->AddPartition(partition_model) != SUCCESS) {
  if (om_file_save_helper->AddPartition(partition_model, model_index) != SUCCESS) {
    GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu", size);
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model,
                                                                                   const SaveParam &save_param,
                                                                                   const std::string &output_file,
                                                                                   ModelBufferData& model) {
  if (output_file.empty()) {
    GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix");
 Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) {
  vector<int64_t> om_info;
  ModelPtr model_tmp = ge::MakeShared<ge::Model>(ge_model->GetName(), ge_model->GetPlatformVersion());
  if (model_tmp == nullptr) {
    GELOGE(FAILED, "Create Model %s Ptr failed", ge_model->GetName().c_str());
    return FAILED;
  }
  model_tmp->SetGraph(ge_model->GetGraph());
  model_tmp->SetVersion(ge_model->GetVersion());
  model_tmp->SetAttr(ge_model->MutableAttrMap());
  ge::Buffer model_buffer;
  (void)model_tmp->Save(model_buffer);
  GELOGD("SaveSizeToModelDef modeldef_size is %zu", model_buffer.GetSize());
  om_info.push_back(model_buffer.GetSize());

  GE_IF_BOOL_EXEC(ge_model == nullptr, GELOGE(FAILED, "Ge_model is nullptr"); return FAILED);
  std::shared_ptr<OmFileSaveHelper> om_file_save_helper = ge::MakeShared<OmFileSaveHelper>();
  GE_CHECK_NOTNULL(om_file_save_helper);
  auto ge_model_weight = ge_model->GetWeight();
  GELOGD("SaveSizeToModelDef weight_data_size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData());
  om_info.push_back(ge_model_weight.GetSize());

  TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore();
  GELOGD("SaveSizeToModelDef tbe_kernels_size is %zu", tbe_kernel_store.DataSize());
  om_info.push_back(tbe_kernel_store.DataSize());

  CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore();
  GELOGD("SaveSizeToModelDef cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize());
  om_info.push_back(cust_aicpu_kernel_store.DataSize());

  std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr();
  if (model_task_def == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  size_t partition_task_size = model_task_def->ByteSizeLong();
  GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size);
  om_info.push_back(partition_task_size);

  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info),
                   GELOGE(FAILED, "SetListInt of om_info_list failed.");
                   return FAILED);

  return SUCCESS;
 }

 Status ModelHelper::SaveModelDef(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
                                 const GeModelPtr &ge_model, ge::Buffer &model_buffer, size_t model_index) {
  ModelPtr model_tmp = ge::MakeShared<ge::Model>(ge_model->GetName(), ge_model->GetPlatformVersion());
  if (model_tmp == nullptr) {
    GELOGE(FAILED, "Create Model %s Ptr failed", ge_model->GetName().c_str());
@@ -95,17 +129,26 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
  model_tmp->SetGraph(ge_model->GetGraph());
  model_tmp->SetVersion(ge_model->GetVersion());
  model_tmp->SetAttr(ge_model->MutableAttrMap());
  Status ret = SaveSizeToModelDef(ge_model);
  if (ret != SUCCESS) {
    GELOGE(ret, "SaveSizeToModelDef failed");
    return ret;
  }

  ge::Buffer model_buffer;
  (void)model_tmp->Save(model_buffer);
  GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize());
  if (model_buffer.GetSize() > 0) {
    if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(),
                           model_buffer.GetSize()) != SUCCESS) {
                           model_buffer.GetSize(), model_index) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Add model graph partition failed");
      return PARAM_INVALID;
    }
  }
  return SUCCESS;
 }

 Status ModelHelper::SaveModelWeights(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
                                     const GeModelPtr &ge_model, size_t model_index) {
  auto ge_model_weight = ge_model->GetWeight();
  GELOGD("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData());
  // weight is not necessary
@@ -113,31 +156,43 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
    GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper,
                                         ModelPartitionType::WEIGHTS_DATA,
                                         ge_model_weight.GetData(),
                                         ge_model_weight.GetSize()), "Add weight partition failed");
                                         ge_model_weight.GetSize(), model_index), "Add weight partition failed");
  }
  return SUCCESS;
 }

 Status ModelHelper::SaveModelTbeKernel(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
                                       const GeModelPtr &ge_model, size_t model_index) {
  TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore();
  GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize());
  if (tbe_kernel_store.DataSize() > 0) {
    GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper,
                                         ModelPartitionType::TBE_KERNELS,
                                         tbe_kernel_store.Data(),
                                         tbe_kernel_store.DataSize()), "Add tbe kernel partition failed");
    GE_CHK_STATUS_RET(
        SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS,
                           ge_model->GetTBEKernelStore().Data(), ge_model->GetTBEKernelStore().DataSize(),
                           model_index), "Add tbe kernel partition failed");
  }

  // no need to check value, DATA->NetOutput
  (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize());

  return SUCCESS;
 }

 Status ModelHelper::SaveModelCustAICPU(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
                                       const GeModelPtr &ge_model, size_t model_index) {
  CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore();
  GELOGD("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize());
  if (cust_aicpu_kernel_store.DataSize() > 0) {
    GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper,
                                         ModelPartitionType::CUST_AICPU_KERNELS,
                                         cust_aicpu_kernel_store.Data(),
                                         cust_aicpu_kernel_store.DataSize()),
                                         ge_model->GetCustAICPUKernelStore().Data(),
                                         cust_aicpu_kernel_store.DataSize(), model_index),
                      "Add cust aicpu kernel partition failed");
  }
  return SUCCESS;
 }

 Status ModelHelper::SaveModelTaskDef(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
                                     const GeModelPtr &ge_model, ge::Buffer &task_buffer, size_t model_index) {
  std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr();
  if (model_task_def == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed");
@@ -146,9 +201,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
  size_t partition_task_size = model_task_def->ByteSizeLong();
  GE_IF_BOOL_EXEC(partition_task_size == 0 || partition_task_size > INT_MAX,
                  GELOGE(FAILED, "Model_def's byte size (%zu) is invalid!", partition_task_size);
                  return FAILED);
                      return FAILED);

  ge::Buffer task_buffer(partition_task_size);
  task_buffer = ge::Buffer(partition_task_size);
  if (task_buffer.GetSize() == 0) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc model task def buffer failed");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -159,21 +214,28 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
  GELOGD("TASK_INFO size is %zu", partition_task_size);

  if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(),
                         partition_task_size) != SUCCESS) {
                         partition_task_size, model_index) != SUCCESS) {
    GELOGE(PARAM_INVALID, "Add model task def partition failed");
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

 Status ModelHelper::SaveModelHeader(std::shared_ptr<OmFileSaveHelper> &om_file_save_helper,
                                    const GeModelPtr &ge_model, size_t model_num) {
  // Save target/version to model_header
  ModelFileHeader &model_header = om_file_save_helper->GetModelFileHeader();
  model_header.platform_type = ge_model->GetPlatformType();
  model_header.om_ir_version = ge_model->GetVersion();
  model_header.model_num = model_num;
  std::string platform_version = ge_model->GetPlatformVersion();

  errno_t err;
  err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(),
                 platform_version.size() + 1);
  if (err != EOK) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelHelper SaveModel failed while allocating memory for platform_version.");
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
           "ModelHelper SaveModel failed while allocating memory for platform_version.");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  string version = reinterpret_cast<char *>(model_header.platform_version);
@@ -188,8 +250,142 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
  }
  string model_name = reinterpret_cast<char *>(model_header.name);
  GELOGD("Model name save:%s", model_name.c_str());
  return SUCCESS;
 }

 Status ModelHelper::SaveAllModelPartiton(std::shared_ptr<OmFileSaveHelper>& om_file_save_helper,
                                         const GeModelPtr &ge_model, ge::Buffer &model_buffer,
                                         ge::Buffer &task_buffer, size_t model_index) {
  if (SaveModelDef(om_file_save_helper, ge_model, model_buffer, model_index) != SUCCESS) {
    GELOGE(FAILED, "save model def failed");
    return FAILED;
  }

  if (SaveModelWeights(om_file_save_helper, ge_model, model_index) != SUCCESS) {
    GELOGE(FAILED, "save model weights failed");
    return FAILED;
  }

  if (SaveModelTbeKernel(om_file_save_helper, ge_model, model_index) != SUCCESS) {
    GELOGE(FAILED, "save model tbe kernel failed");
    return FAILED;
  }

  Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_);
  if (SaveModelCustAICPU(om_file_save_helper, ge_model, model_index) != SUCCESS) {
    GELOGE(FAILED, "save model cust ai cpu failed");
    return FAILED;
  }


  if (SaveModelTaskDef(om_file_save_helper, ge_model, task_buffer, model_index) != SUCCESS) {
    GELOGE(FAILED, "save task def failed");
    return FAILED;
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model,
                                                                                   const SaveParam &save_param,
                                                                                   const std::string &output_file,
                                                                                   ModelBufferData& model) {
  if (output_file.empty()) {
    GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix");
    return FAILED;
  }

  GE_IF_BOOL_EXEC(ge_model == nullptr, GELOGE(FAILED, "Ge_model is nullptr"); return FAILED);
  std::shared_ptr<OmFileSaveHelper> om_file_save_helper = ge::MakeShared<OmFileSaveHelper>();
  GE_CHECK_NOTNULL(om_file_save_helper);
  ge::Buffer model_buffer;
  ge::Buffer task_buffer;

  auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffer, task_buffer);
  if (ret != SUCCESS) {
    GELOGE(ret, "save all model partition failed");
    return ret;
  }

  ret = SaveModelHeader(om_file_save_helper, ge_model);
  if (ret != SUCCESS) {
    GELOGE(ret, "save model header failed");
    return ret;
  }

  ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "OmFileSaveHelper SaveModel return fail.");
    return ret;
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmRootModel(
    const GeRootModelPtr &ge_root_model,
    const SaveParam &save_param,
    const std::string &output_file,
    ModelBufferData& model,
    bool is_unknown_shape) {

  GE_CHECK_NOTNULL(ge_root_model);
  GE_IF_BOOL_EXEC(ge_root_model == nullptr, GELOGE(FAILED, "Ge_root_model is nullptr"); return FAILED);

  auto &name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GE_IF_BOOL_EXEC(name_to_ge_model.empty(), GELOGE(FAILED, "Ge_root_model has no sub model"); return FAILED);
  GE_IF_BOOL_EXEC(output_file.empty(),
                  GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix");
                  return FAILED);

  if (!is_unknown_shape) {
    auto &model_root = name_to_ge_model.begin()->second;
    return SaveToOmModel(model_root, save_param, output_file, model);
  }

  std::shared_ptr<OmFileSaveHelper> om_file_save_helper = ge::MakeShared<OmFileSaveHelper>();
  GE_CHECK_NOTNULL(om_file_save_helper);

  auto &first_ge_model = name_to_ge_model.at(ge_root_model->GetRootGraph()->GetName());

  // ge root model must be the first to be loaded
  vector<string> model_names{ge_root_model->GetRootGraph()->GetName()};
  for (auto &item : name_to_ge_model) {
    if (item.first != model_names.front()) {
      model_names.emplace_back(item.first);
    }
  }

  vector<ge::Buffer> model_buffers(model_names.size());
  vector<ge::Buffer> task_buffers(model_names.size());

  size_t cur_index = 0;

  if (model_names.size() > 1) {
    GELOGD("only save first model MODEL_DEF");
    if (SaveModelDef(om_file_save_helper, first_ge_model, model_buffers[cur_index], cur_index) != SUCCESS) {
      GELOGE(FAILED, "save model def failed");
      return FAILED;
    }
    ++cur_index;
  }

  for (; cur_index < model_names.size(); ++cur_index) {
    auto model_name = model_names[cur_index];
    GELOGD("cur model %s index is %zu", model_name.c_str(), cur_index);
    const GeModelPtr &ge_model = name_to_ge_model.at(model_name);
    auto ret = SaveAllModelPartiton(om_file_save_helper, ge_model, model_buffers[cur_index],
                                    task_buffers[cur_index], cur_index);
    if (ret != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Save model %s failed", model_name.c_str());
      return INTERNAL_ERROR;
    }
  }

  auto ret = SaveModelHeader(om_file_save_helper, first_ge_model, model_names.size());
  if (ret != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Save model %s header failed", first_ge_model->GetName().c_str());
    return INTERNAL_ERROR;
  }

  ret = om_file_save_helper->SaveRootModel(save_param, output_file.c_str(), model, is_offline_);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "OmFileSaveHelper SaveModel return fail.");
    return FAILED;
@@ -288,7 +484,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c
  }

  file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data);

  OmFileLoadHelper om_load_helper;
  status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_);
  if (status != SUCCESS) {
@@ -310,7 +505,61 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c
    GELOGE(status, "GenerateGeModel failed");
    return status;
  }
  GELOGD("in ModelHelper::LoadModel, is_assign_model_ is setted to true!");
  is_assign_model_ = true;
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootModel(const ge::ModelData &model_data) {
  if (model_data.model_data == nullptr || model_data.model_len == 0) {
    GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0");
    return GE_EXEC_MODEL_DATA_SIZE_INVALID;
  }

  if (is_assign_model_) {
    GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!");
    return GE_EXEC_LOAD_MODEL_REPEATED;
  }

  if (ReleaseLocalModelData() != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed.");
    return INTERNAL_ERROR;
  }

  Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
  if (status != SUCCESS) {
    GELOGE(status, "Parse model content failed!");
    return status;
  }

  file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data);

  //model verison 1.0 file header does not have model_num member
  is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION &&
                            file_header_->model_num > kStatiOmFileModelNum;
  GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version);

  OmFileLoadHelper om_load_helper;
  if (is_unknown_shape_model_) {
    auto model_num = file_header_->model_num;
    status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_, model_num);
  } else {
    status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_);
  }
  if (status != SUCCESS) {
    GELOGE(status, "Om_load_helper init failed");
    model_addr_tmp_ = nullptr;
    return status;
  }
  // Encrypt model need to del temp model/no encrypt model don't need to del model
  model_addr_tmp_ = nullptr;

  status = GenerateGeRootModel(om_load_helper);
  if (status != SUCCESS) {
    GELOGE(status, "GenerateGeRootModel failed");
    return status;
  }
  GELOGD("in ModelHelper::LoadRootModel, is_assign_model_ is setted to true!");
  is_assign_model_ = true;
  return SUCCESS;
 }
@@ -341,6 +590,61 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) {
  return SUCCESS;
 }

 Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) {
  GELOGD("Begin to generate ge root model");
  root_model_ = ge::MakeShared<ge::GeRootModel>();
  GE_CHECK_NOTNULL(root_model_);
  if (!is_unknown_shape_model_) {
    if (GenerateGeModel(om_load_helper) != SUCCESS) {
      GELOGE(FAILED, "GenerateGeModel failed");
      return FAILED;
    }
    GE_CHECK_NOTNULL(model_);
    root_model_->SetRootGraph(GraphUtils::GetComputeGraph(model_->GetGraph()));
    return SUCCESS;
  }

  bool is_first_model = true;
  for (size_t mode_index = 0;  mode_index < file_header_->model_num; ++mode_index) {
    GeModelPtr cur_model = ge::MakeShared<ge::GeModel>();
    Status ret = LoadModelData(om_load_helper, cur_model, mode_index);
    if (ret != SUCCESS) {
      return GE_EXEC_LOAD_MODEL_PARTITION_FAILED;
    }

    if (is_first_model) {
      is_first_model = false;
      root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph()));
      root_model_->SetModelId(cur_model->GetModelId());
      model_ = cur_model;
      continue;
    }

    ret = LoadWeights(om_load_helper, cur_model, mode_index);
    if (ret != SUCCESS) {
      return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED;
    }

    ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index);
    if (ret != SUCCESS) {
      return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
    }

    ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index);
    if (ret != SUCCESS) {
      return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
    }

    ret = LoadTask(om_load_helper, cur_model, mode_index);
    if (ret != SUCCESS) {
      return GE_EXEC_LOAD_TASK_PARTITION_FAILED;
    }
    root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model);
  }

  return SUCCESS;
 }

 Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) {
  ModelPartition partition_model_def;
  // no need to check value, DATA->NetOutput
@@ -353,19 +657,35 @@ Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) {
    return INTERNAL_ERROR;
  }

  SetModelToGeModel(model);

  SetModelToGeModel(model_, model);
  return SUCCESS;
 }

 void ModelHelper::SetModelToGeModel(ge::Model &model) {
  model_->SetGraph(model.GetGraph());
  model_->SetName(model.GetName());
  model_->SetVersion(model.GetVersion());
  model_->SetPlatformVersion(model.GetPlatformVersion());
  model_->SetAttr(model.MutableAttrMap());
 void ModelHelper::SetModelToGeModel(GeModelPtr &ge_model, Model &model) {
  ge_model->SetGraph(model.GetGraph());
  ge_model->SetName(model.GetName());
  ge_model->SetVersion(model.GetVersion());
  ge_model->SetPlatformVersion(model.GetPlatformVersion());
  ge_model->SetAttr(model.MutableAttrMap());
 }

 Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) {
  ModelPartition partition_model_def;
  // no need to check value, DATA->NetOutput
  om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def, mode_index);
  GELOGD("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size);

  ge::Model model;
  if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Load model failed.");
    return INTERNAL_ERROR;
  }

  SetModelToGeModel(cur_model, model);
  return SUCCESS;
 }


 Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) {
  ModelPartition partition;
  if (om_load_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition) != SUCCESS) {
@@ -379,6 +699,19 @@ Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) {
  return SUCCESS;
 }

 Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) {
  ModelPartition partition;
  if (om_load_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition, mode_index) != SUCCESS) {
    GELOGE(FAILED, "Get weight model partition failed.");
    return FAILED;
  }
  ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size);
  cur_model->SetWeight(weight);

  GELOGD("GetWeight size:%u", partition.size);
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(OmFileLoadHelper &om_load_helper) {
  ModelPartition task_partition;
  if (om_load_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) {
@@ -398,6 +731,27 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(OmFileLoadHelper &om_load_helper,
                                                                              GeModelPtr &cur_model,
                                                                              size_t mode_index) {
  ModelPartition task_partition;
  if (om_load_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition, mode_index) != SUCCESS) {
    GELOGE(FAILED, "Get task model partition failed.");
    return FAILED;
  }
  std::shared_ptr<ModelTaskDef> task = ge::MakeShared<ModelTaskDef>();
  GE_CHECK_NOTNULL(task);
  if (task_partition.size != 0) {
    if (!ReadProtoFromArray(task_partition.data, task_partition.size, task.get())) {
      GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed.");
      return INTERNAL_ERROR;
    }
    GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num());
  }
  cur_model->SetModelTaskDef(task);
  return SUCCESS;
 }

 Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) {
  // Load tbe kernels
  ModelPartition partition_kernel_def;
@@ -414,6 +768,23 @@ Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) {
  return SUCCESS;
 }

 Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, size_t mode_index) {
  // Load tbe kernels
  ModelPartition partition_kernel_def;
  TBEKernelStore kernel_store;
  if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def, mode_index) ==
      SUCCESS) {
    GELOGD("Kernels partition size:%u", partition_kernel_def.size);
    if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) {
      GELOGD("Load tbe kernels success");
    } else {
      GELOGW("Load tbe kernels failed");
    }
  }
  cur_model->SetTBEKernelStore(kernel_store);
  return SUCCESS;
 }

 Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) {
  // Load cust aicpu kernels
  ModelPartition partition_kernel_def;
@@ -421,19 +792,39 @@ Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) {
  if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) {
    GELOGD("Kernels partition size:%u", partition_kernel_def.size);
    if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) {
      GELOGI("Load cust aicpu kernels success");
      GELOGD("Load cust aicpu kernels success");
    } else {
      GELOGW("Load cust aicpu kernels failed");
    }
  }
  model_->SetCustAICPUKernelStore(kernel_store);
  return SUCCESS;
 }

 Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper,
                                             GeModelPtr &cur_model, size_t mode_index) {
  // Load cust aicpu kernels
  ModelPartition partition_kernel_def;
  CustAICPUKernelStore kernel_store;
  if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def, mode_index)
      == SUCCESS) {
    GELOGD("Kernels partition size:%u", partition_kernel_def.size);
    if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) {
      GELOGD("Load cust aicpu kernels success");
    } else {
      GELOGW("Load cust aicpu kernels failed");
    }
  }
  cur_model->SetCustAICPUKernelStore(kernel_store);
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeModelPtr ModelHelper::GetGeModel() {
  if (model_ != nullptr) {
    return model_;
  }

  GELOGI("Model has not been loaded!");
  GELOGD("Model has not been loaded!");
  std::shared_ptr<ge::GeModel> out_model = ge::MakeShared<ge::GeModel>();
  if (out_model == nullptr) {
    return nullptr;
@@ -441,6 +832,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeModelPtr ModelHelper::GetGeMo
  return out_model;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeRootModelPtr ModelHelper::GetGeRootModel() {
  if (root_model_ != nullptr) {
    return root_model_;
  }

  GELOGD("Model has not been loaded!");
  std::shared_ptr<ge::GeRootModel> out_model = ge::MakeShared<ge::GeRootModel>();
  if (out_model == nullptr) {
    return nullptr;
  }
  return out_model;
 }


 Status ModelHelper::ReleaseLocalModelData() noexcept {
  Status result = SUCCESS;
  if (model_addr_tmp_ != nullptr) {
--- a/ge/common/helper/om_file_helper.cc
+++ b/ge/common/helper/om_file_helper.cc
@@ -52,6 +52,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data,
                                                                               uint32_t model_data_size,
                                                                               uint32_t model_num) {
  Status status = LoadModelPartitionTable(model_data, model_data_size, model_num);
  if (status != SUCCESS) {
    return status;
  }
  is_inited_ = true;
  return SUCCESS;
 }

 // Use both
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type,
                                                                                            ModelPartition &partition) {
@@ -79,6 +90,37 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type,
                                                                                            ModelPartition &partition,
                                                                                            size_t model_index) {
  if (!is_inited_) {
    GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!");
    return PARAM_INVALID;
  }
  if (model_index >= model_contexts_.size()) {
    GELOGE(PARAM_INVALID, "cur index : %zu, model_contexts size:%zu", model_index, model_contexts_.size());
    return PARAM_INVALID;
  }
  auto &cur_ctx = model_contexts_[model_index];
  bool found = false;
  for (ModelPartition &part : cur_ctx.partition_datas_) {
    if (part.type == type) {
      partition = part;
      found = true;
      break;
    }
  }

  if (!found) {
    if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA &&
        type != ModelPartitionType::CUST_AICPU_KERNELS) {
      GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type));
      return FAILED;
    }
  }
  return SUCCESS;
 }

 Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {
  // Parameter validity check
  if (model.model_data == nullptr) {
@@ -138,7 +180,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
    context_.partition_datas_.push_back(partition);

    if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) {
      GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.",
      GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID,
             "The partition size %zu is greater than the model data size %u.",
             partition.size + mem_offset, model_data_size);
      return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID;
    }
@@ -148,6 +191,61 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
  return SUCCESS;
 }

 Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t model_data_size, uint32_t model_num) {
  if (model_data == nullptr) {
    GELOGE(PARAM_INVALID, "Param model_data must not be null!");
    return PARAM_INVALID;
  }

  uint32_t cur_offset = 0;
  for (uint32_t index = 0; index < model_num; ++index) {
    // Init partition table
    auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data + cur_offset);
    size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
    cur_offset += partition_table_size;
    GELOGD("Cur model index %zu: ModelPartitionTable num :%u, "
           "ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
           index, partition_table->num, sizeof(ModelFileHeader), partition_table_size);
    if (model_data_size <= cur_offset) {
      GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
             partition_table->num, model_data_size);
      return GE_EXEC_MODEL_DATA_SIZE_INVALID;
    }

    for (uint32_t i = 0; i < partition_table->num; i++) {
      ModelPartition partition;
      partition.size = partition_table->partition[i].mem_size;
      partition.data = model_data + cur_offset;
      partition.type = partition_table->partition[i].type;
      if (index >= model_contexts_.size()) {
        if (index != model_contexts_.size()) {
          GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index);
          return FAILED;
        }

        OmFileContext tmp_ctx;
        tmp_ctx.partition_datas_.push_back(partition);
        model_contexts_.push_back(tmp_ctx);
      } else {
        model_contexts_[index].partition_datas_.push_back(partition);
      }

      if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) {
        GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.",
               partition.size + cur_offset, model_data_size);
        return GE_EXEC_MODEL_DATA_SIZE_INVALID;
      }
      cur_offset += partition.size;
      GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index);
    }
  }
  if (cur_offset != model_data_size) {
    GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size);
    return FAILED;
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::vector<ModelPartition>
  &OmFileSaveHelper::GetModelPartitions() const {
  return context_.partition_datas_;
@@ -172,6 +270,28 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSave
  return partition_table;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSaveHelper::GetPartitionTable(
    size_t cur_ctx_index) {
  auto &cur_ctx = model_contexts_[cur_ctx_index];
  auto partition_size = static_cast<uint32_t>(cur_ctx.partition_datas_.size());
  // Build ModelPartitionTable, flex array
  cur_ctx.partition_table_.clear();
  cur_ctx.partition_table_.resize(sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * partition_size, 0);

  auto partition_table = reinterpret_cast<ModelPartitionTable *>(cur_ctx.partition_table_.data());
  partition_table->num = partition_size;

  uint32_t mem_offset = 0;
  for (uint32_t i = 0; i < partition_size; i++) {
    ModelPartition partition = cur_ctx.partition_datas_[i];
    partition_table->partition[i] = {partition.type, mem_offset, partition.size};
    mem_offset += partition.size;
    GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size);
  }
  return partition_table;
 }


 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileSaveHelper::AddPartition(ModelPartition &partition) {
  if (ge::CheckUint32AddOverflow(context_.model_data_len_, partition.size) != SUCCESS) {
    GELOGE(FAILED, "UINT32 %u and %u addition can result in overflow!", context_.model_data_len_, partition.size);
@@ -182,6 +302,27 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileSaveHelper::AddPar
  return SUCCESS;
 }

 Status OmFileSaveHelper::AddPartition(ModelPartition &partition, size_t cur_index) {
  if (ge::CheckUint32AddOverflow(context_.model_data_len_, partition.size) != SUCCESS) {
    GELOGE(FAILED, "UINT32 %u and %u addition can result in overflow!", context_.model_data_len_, partition.size);
    return FAILED;
  }
  if (cur_index >= model_contexts_.size()) {
    if (cur_index != model_contexts_.size()) {
      GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", cur_index);
      return FAILED;
    }
    OmFileContext tmp_ctx;
    tmp_ctx.model_data_len_ += partition.size;
    tmp_ctx.partition_datas_.push_back(partition);
    model_contexts_.push_back(tmp_ctx);
  } else {
    model_contexts_[cur_index].model_data_len_ += partition.size;
    model_contexts_[cur_index].partition_datas_.push_back(partition);
  }
  return SUCCESS;
 }

 Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model,
                                   bool is_offline) {
  (void)save_param.cert_file;
@@ -198,6 +339,10 @@ Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *outp

 Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferData &model, bool is_offline) {
 #if !defined(NONSUPPORT_SAVE_TO_FILE)
  if (context_.partition_datas_.empty()) {
    GE_CHK_BOOL_EXEC(!model_contexts_.empty(), return FAILED, "mode contexts empty");
    context_ = model_contexts_.front();
  }
  uint32_t model_data_len = context_.model_data_len_;
  if (model_data_len == 0) {
    GELOGE(domi::PARAM_INVALID, "Model data len error! should not be 0");
@@ -231,4 +376,53 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat
  return SUCCESS;
 #endif
 }

 Status OmFileSaveHelper::SaveRootModel(const SaveParam &save_param, const char *output_file,
                                       ModelBufferData &model, bool is_offline) {
  (void)save_param.cert_file;
  (void)save_param.ek_file;
  (void)save_param.encode_mode;
  (void)save_param.hw_key_file;
  (void)save_param.pri_key_file;

 #if !defined(NONSUPPORT_SAVE_TO_FILE)
  vector<ModelPartitionTable *> model_partition_tabels;
  vector<vector<ModelPartition>> all_model_partitions;
  for (size_t ctx_index = 0; ctx_index < model_contexts_.size(); ++ctx_index) {
    auto &cur_ctx = model_contexts_[ctx_index];
    uint32_t cur_model_data_len = cur_ctx.model_data_len_;
    if (cur_model_data_len == 0) {
      GELOGE(domi::PARAM_INVALID, "Model data len error! should not be 0");
      return domi::PARAM_INVALID;
    }

    auto tmp_table = GetPartitionTable(ctx_index);
    if (tmp_table == nullptr) {
      GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL.");
      return ge::GE_GRAPH_SAVE_FAILED;
    }
    uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*tmp_table);
    FMK_UINT32_ADDCHECK(size_of_table, cur_model_data_len)
    FMK_UINT32_ADDCHECK(size_of_table + cur_model_data_len, model_header_.length)
    model_header_.length += size_of_table + cur_model_data_len;
    model_partition_tabels.push_back(tmp_table);
    all_model_partitions.push_back(cur_ctx.partition_datas_);
    GELOGD("sizeof(ModelPartitionTable):%u, cur_model_data_len:%u, cur_context_index:%zu",
           size_of_table, cur_model_data_len, ctx_index);
  }
  Status ret;
  if (is_offline) {
    ret = FileSaver::SaveToFile(output_file, model_header_, model_partition_tabels, all_model_partitions);
  } else {
    GELOGW("do not support save ge root model to buff now");
    return FAILED;
  }
  if (ret == SUCCESS) {
    GELOGD("Save model success without encrypt.");
  }
  return ret;
 #else
  return SUCCESS;
 #endif
 }
 }  // namespace ge
--- a/ge/common/op/ge_op_utils.cc
+++ b/ge/common/op/ge_op_utils.cc
@@ -357,7 +357,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::TransDataHWCK2KCH
  const char *w_data = (const char *)input;

  int64_t count = h * w * c * k;
  GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return );
  GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return);
  float *buf = new (std::nothrow) float[count]();
  GE_RT_VOID_CHECK_NOTNULL(buf);
  float *src_buff = nullptr;
--- a/ge/common/profiling/ge_profiling.cc
+++ b/ge/common/profiling/ge_profiling.cc
@@ -0,0 +1,198 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/profiling/ge_profiling.h"
 #include "runtime/base.h"
 #include "common/profiling/profiling_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "graph/load/graph_loader.h"
 #include "init/gelib.h"
 #include "framework/common/ge_inner_error_codes.h"

 namespace {
 const uint32_t kDeviceListIndex = 3;
 const std::string kDeviceNums = "devNums";
 const std::string kDeviceIdList = "devIdList";
 const std::string kProfilingInit = "prof_init";
 const std::string kProfilingFinalize = "prof_finalize";
 const std::string kProfilingStart = "prof_start";
 const std::string kProfilingStop = "prof_stop";
 const std::string kProfModelSubscribe = "prof_model_subscribe";
 const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 const std::string kRtSetDeviceRegName = "profiling";

 const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = {
    {kProfCommandhandleInit, kProfilingInit},
    {kProfCommandhandleStart, kProfilingStart},
    {kProfCommandhandleStop, kProfilingStop},
    {kProfCommandhandleFinalize, kProfilingFinalize},
    {kProfCommandhandleModelSubscribe, kProfModelSubscribe},
    {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}};
 }  // namespace

 bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) {
  prof_config_params.clear();
  prof_config_params.emplace_back(kDeviceNums);
  prof_config_params.emplace_back(std::to_string(profCommand.devNums));
  prof_config_params.emplace_back(kDeviceIdList);
  std::string devID = "";
  if (profCommand.devNums == 0) {
    GELOGW("The device num is invalid.");
    return false;
  }
  for (uint32_t i = 0; i < profCommand.devNums; i++) {
    devID.append(std::to_string(profCommand.devIdList[i]));
    if (i != profCommand.devNums - 1) {
      devID.append(",");
    }
  }

  prof_config_params.push_back(devID);
  return true;
 }

 bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
  if (deviceid_list == nullptr) {
    GELOGE(ge::PARAM_INVALID, "deviceIdList is nullptr");
    return false;
  }
  if (device_nums == 0 || device_nums > MAX_DEV_NUM) {
    GELOGE(ge::PARAM_INVALID, "The device nums: %u is invalid.", device_nums);
    return false;
  }

  // real device num
  int32_t dev_count = 0;
  rtError_t rt_err = rtGetDeviceCount(&dev_count);
  if (rt_err != RT_ERROR_NONE) {
    GELOGE(ge::INTERNAL_ERROR, "Get the Device count fail.");
    return false;
  }

  if (device_nums > static_cast<uint32_t>(dev_count)) {
    GELOGE(ge::PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count);
    return false;
  }

  std::unordered_set<uint32_t> record;
  for (size_t i = 0; i < device_nums; ++i) {
    uint32_t dev_id = deviceid_list[i];
    if (dev_id >= static_cast<uint32_t>(dev_count)) {
      GELOGE(ge::PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count);
      return false;
    }
    if (record.count(dev_id) > 0) {
      GELOGE(ge::PARAM_INVALID, "Device id %u is duplicatedly set", dev_id);
      return false;
    }
    record.insert(dev_id);
  }
  return true;
 }

 ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
  if (func == nullptr) {
    GELOGE(ge::PARAM_INVALID, "Msprof ctrl callback is nullptr.");
    return ge::PARAM_INVALID;
  }
  if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
    GELOGW("Msprof ctrl callback is exist, just ignore it.");
  } else {
    ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func);
  }
  return ge::SUCCESS;
 }

 ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
  if (func == nullptr) {
    GELOGE(ge::PARAM_INVALID, "MsprofSetDeviceCallback callback is nullptr.");
    return ge::PARAM_INVALID;
  }
  // Pass MsprofSetDeviceCallback to runtime
  ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
  if (rt_ret != ge::SUCCESS) {
    GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!");
    return rt_ret;
  }
  return ge::SUCCESS;
 }

 ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
  if (func == nullptr) {
    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
    return ge::PARAM_INVALID;
  }
  if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) {
    GELOGW("Msprof reporter callback is exist, just ignore it.");
  } else {
    GELOGI("GE register Msprof reporter callback.");
    ge::ProfilingManager::Instance().SetMsprofReporterCallback(func);
    // Pass MsprofReporterCallback to runtime
    ge::Status rt_ret = rtSetMsprofReporterCallback(func);
    if (rt_ret != ge::SUCCESS) {
      GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!");
      return rt_ret;
    }
    // Pass MsprofReporterCallback to hccl
  }
  return ge::SUCCESS;
 }

 ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) {
  if (type != kProfCommandhandleFinalize) {
    GE_CHECK_NOTNULL(data);
  }
  ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data);
  auto iter = kProfCommandTypeMap.find(type);
  if (iter == kProfCommandTypeMap.end()) {
    GELOGW("The prof comand type is invalid.");
    return ge::PARAM_INVALID;
  }
  std::vector<string> prof_params;
  if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
    if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) {
      return ge::FAILED;
    }
  
    if (!TransProfConfigToParam(*prof_config_param, prof_params)) {
      GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed");
      return ge::PARAM_INVALID;
    }
  }
  ge::GraphLoader graph_loader;
  ge::Command command;
  command.cmd_params.clear();
  command.cmd_type = iter->second;
  command.cmd_params = prof_params;
  if (type != kProfCommandhandleFinalize) {
    command.module_index = prof_config_param->profSwitch;
  }
  GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
         command.module_index);
  if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
    GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
  }
  ge::Status ret = graph_loader.CommandHandle(command);
  if (ret != ge::SUCCESS) {
    GELOGE(ret, "Handle profiling command failed");
    return ge::FAILED;
  }

  GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index);
  return ge::SUCCESS;
 }

--- a/ge/common/profiling/ge_runner_profiling.cc
+++ b/ge/common/profiling/ge_runner_profiling.cc
@@ -0,0 +1,26 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/profiling/ge_runner_profiling.h"
 #include "init/gelib.h"

 bool IsInitialize() {
  std::shared_ptr<ge::GELib> instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) {
    return false;
  }
  return true;
 }
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -24,16 +24,11 @@
 #include "graph/load/new_model_manager/davinci_model.h"

 namespace {
 const char *const kJobID = "jobID";
 const char *const kDeviceID = "deviceID";
 const char *const kStartCfg = "startCfg";
 const char *const kFeatures = "features";
 const char *const kConf = "conf";
 const char *const kEvents = "events";
 const char *const kAiCoreEvents = "ai_core_events";
 const char *const kName = "name";
 const char *const kTraceID = "traceId";
 const char *const kProfDir = "resultPath";
 const char *const kTrainingTrace = "training_trace";
 const char *const kFpPoint = "fp_point";
 const char *const kBpPoint = "bp_point";

 #ifdef DAVINCI_SUPPORT_PROFILING
 const size_t kReportMaxLen = 2048;
 const int32_t kMaxDeviceNum = 256;
 const std::string kConfigNumsdev = "devNums";
@@ -42,10 +37,15 @@ const std::string kProfStart = "prof_start";
 const std::string kProfStop = "prof_stop";
 const std::string kProfModelSubscribe = "prof_model_subscribe";
 const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 #endif
 }  // namespace

 namespace ge {
 ProfilingManager::ProfilingManager() : subscribe_count_(0) {}
 ProfilingManager::ProfilingManager()
    : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) {
  prof_cb_.msprofCtrlCallback = nullptr;
  prof_cb_.msprofReporterCallback = nullptr;
 }

 ProfilingManager::~ProfilingManager() {}

@@ -58,44 +58,29 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
 #ifdef DAVINCI_SUPPORT_PROFILING
  vector<int32_t>().swap(device_id_);
  subscribe_count_ = 0;
  job_id_ = options.job_id;

  GELOGI("ProfilingManager::Init  job_id:%s", job_id_.c_str());

  GELOGI("ProfilingManager::Init  job_id:%s", options.job_id.c_str());


  Status ret;
  if (!recv_profiling_config_.empty()) {
    GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str());
    ret = InitFromAclCfg(recv_profiling_config_);
  } else {
    ret = InitFromOptions(options);
    if (ret == SUCCESS && is_load_profiling_) {
      device_id_.push_back(options.device_id);
    }
  }
  struct MsprofGeOptions prof_conf = {{ 0 }};
  Status ret = InitFromOptions(options, prof_conf);
  if (ret != SUCCESS) {
    GELOGE(ret, "Failed to init profiling.");
    return ret;
  }

  if (is_load_profiling_) {
    // register Framework to profiling
    int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
    if (result != 0) {
      GELOGE(FAILED, "Register profiling engine failed.");
      return FAILED;
  if (is_execute_profiling_) {
    if (prof_cb_.msprofCtrlCallback == nullptr) {
      GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr.");
      return ge::PARAM_INVALID;
    }
    // profiling startup first time
    GELOGI("Begin to init profiling, device num %zu", device_id_.size());
    for (size_t i = 0; i < device_id_.size(); ++i) {
      ret = StartProfiling(0, device_id_[i]);
      if (ret != SUCCESS) {
        GELOGW("Profiling start failed on device %d.", device_id_[i]);
        continue;
      }
      GELOGI("Profiling init succ on device %d.", device_id_[i]);
    int32_t cb_ret = prof_cb_.msprofCtrlCallback(
        static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
        static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions));
    if (cb_ret != 0) {
      GELOGE(FAILED, "Call msprofCtrlCallback failed, type:%u, return:%d",
             static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret);
      return FAILED;
    }
    GELOGI("Profiling init success");
  } else {
    GELOGI("The profiling is off, skip the initialization");
  }
@@ -103,288 +88,120 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromAclCfg(
    const std::string &config) {
 ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  try {
    is_load_profiling_ = false;
    is_execute_profiling_ = false;
    profiling_opts_.clear();
    op_trace_conf_.clear();
    Json start_prof_conf = Json::parse(config);
    Json &prof_conf = start_prof_conf[kStartCfg][0];
    job_id_ = prof_conf[kJobID];
    auto iter = prof_conf.find(kProfDir);
    if (iter != prof_conf.end()) {
      prof_dir_ = prof_conf[kProfDir];
    }
    Json &device_id = prof_conf[kDeviceID];
    if (device_id.size() != 0) {
      vector<int32_t>().swap(device_id_);
      bool is_all = false;
      for (size_t i = 0; i < device_id.size(); i++) {
        std::string device_id_str = device_id[i].get<std::string>();
        if (device_id_str == "all") {
          is_all = true;
          break;
        }
        device_id_.push_back(std::stoi(device_id_str));
      }
      if (is_all) {
        int32_t count = 0;
        rtError_t rt_err = rtGetDeviceCount(&count);
        if (rt_err != RT_ERROR_NONE) {
          GELOGE(FAILED, "Call rtGetDeviceCount to get device failed.");
        }
  // enable profiling by env
  char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 };
  is_execute_profiling_ = false;

        vector<int32_t>().swap(device_id_);
        for (int32_t i = 0; i < count; ++i) {
          device_id_.push_back(i);
        }
      }
  if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
    // enable profiling by ge option
    if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
                  MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
      GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
      return INTERNAL_ERROR;
    }

    Json &features = prof_conf[kFeatures];
    if (ParseFeaturesFromAclCfg(features) != SUCCESS) {
      GELOGE(FAILED, "Parse feature from acl cfg failed.");
      return FAILED;
    is_execute_profiling_ = true;
    GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
           options.profiling_options.c_str());
  } else {
    (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
    (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
    // The env is invalid
    if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) {
      return SUCCESS;
    }
    is_load_profiling_ = true;
    // enable profiling by env
    is_execute_profiling_ = true;
  } catch (...) {
    GELOGE(FAILED, "Json conf is not invalid !");
    GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options);
  }

  if (!is_execute_profiling_) {
    return SUCCESS;
  }

  // Parse json str for bp fp
  Status ret = ParseOptions(prof_conf.options);
  if (ret != ge::SUCCESS) {
    GELOGE(ge::PARAM_INVALID, "Parse training trace param failed.");
    return ge::PARAM_INVALID;
  }

  if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) !=
      EOK) {
    GELOGE(INTERNAL_ERROR, "copy job_id failed.");
    return INTERNAL_ERROR;
  }
  GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
 #endif
  return ge::SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg(
    const Json &features) {
 #ifdef DAVINCI_SUPPORT_PROFILING
 ge::Status ProfilingManager::ParseOptions(const std::string &options) {
  if (options.empty()) {
    GELOGE(ge::PARAM_INVALID, "Profiling options is empty.");
    return ge::PARAM_INVALID;
  }
  try {
    for (size_t i = 0; i < features.size(); ++i) {
      const Json &feature = features[i];
      if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) {
        continue;
      }
      const std::string &name = feature[kName];
      if (name == "op_trace") {
        const Json &conf = feature[kConf];
        const Json &events = conf[0][kEvents];
        const std::string &ai_core_events = events[0][kAiCoreEvents];
        GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str());
        is_op_trace_ = true;
        ProfMgrConf prof_mgr_conf;
        int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf);
        if (result != 0) {
          GELOGE(FAILED, "ProfMgrGetConf failed.");
          return FAILED;
        }
        op_trace_conf_ = prof_mgr_conf.conf;
        op_trace_iter_num_ = static_cast<int32_t>(op_trace_conf_.size());
        GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_);
      } else if (name == "task_trace") {
        is_op_trace_ = false;
        if (feature.find(kConf) != feature.end()) {
          const Json &conf = feature[kConf];
          std::stringstream task_trace_conf;
          task_trace_conf << conf;
          task_trace_conf_ = task_trace_conf.str();
        }
        GELOGI("Task trace config from acl");
      } else if (name == "system_trace") {
        is_op_trace_ = false;
        const Json &conf = feature[kConf];
        std::stringstream system_trace_conf;
        system_trace_conf << conf;
        system_trace_conf_ = system_trace_conf.str();
        GELOGI("System trace config from acl");
      }
      profiling_opts_.push_back(name);
    Json prof_options = Json::parse(options);
    if (options.find(kTrainingTrace) == std::string::npos) {
      return ge::SUCCESS;
    }
    const std::string training_trace = prof_options[kTrainingTrace];
    if (training_trace.empty()) {
      GELOGI("Training trace will not take effect.");
      return ge::SUCCESS;
    }
    GELOGI("GE profiling training trace:%s", training_trace.c_str());
    if (training_trace != "on") {
      GELOGE(ge::PARAM_INVALID, "Training trace param:%s is invalid.", training_trace.c_str());
      return ge::PARAM_INVALID;
    }
    fp_point_ = prof_options[kFpPoint];
    bp_point_ = prof_options[kBpPoint];
    if (!fp_point_.empty() && !bp_point_.empty()) {
      GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
    }
    is_training_trace_ = true;
  } catch (...) {
    GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !");
    GELOGE(FAILED, "Json prof_conf options is invalid.");
    return ge::PARAM_INVALID;
  }
 #endif
  return ge::SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() {
 #ifdef DAVINCI_SUPPORT_PROFILING
  // enable profiling support two ways: env and front end
  char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 };
  char prof_options_temp[MMPA_MAX_PATH] = { 0x00 };
  (void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH);
  (void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH );
  const char *profiling_mode = profiling_mode_temp;
  const char *prof_options = prof_options_temp;
  if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) {
    is_load_profiling_ = false;
    is_execute_profiling_ = false;
  } else {
    std::string prof_options_str = std::string(prof_options);
    profiling_opts_ = StringUtils::Split(prof_options_str, ':');
    is_load_profiling_ = true;
    is_execute_profiling_ = true;
    GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options);
  }
  if (!is_load_profiling_) {
    const std::string enable_profiling = "1";
    if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) {
      is_load_profiling_ = false;
      is_execute_profiling_ = false;
      return SUCCESS;
    } else {
      profiling_opts_ = StringUtils::Split(options.profiling_options, ':');
      is_load_profiling_ = true;
      is_execute_profiling_ = true;
      GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str());
    }
  }
  // features:'training_trace', 'task_trace' or 'op_trace'  etc
  if (!profiling_opts_.empty()) {
    if (profiling_opts_[0] == "op_trace") {
      is_op_trace_ = true;
      // op trace get conf
      ProfMgrConf prof_mgr_conf;
      int result = ProfMgrGetConf("", &prof_mgr_conf);
      if (result != 0) {
        GELOGE(FAILED, "ProfMgrGetConf failed.");
        return FAILED;
      }
      op_trace_conf_ = prof_mgr_conf.conf;
      op_trace_iter_num_ = static_cast<int32_t>(op_trace_conf_.size());
      GELOGI("op trace profiling iter num %d,", op_trace_iter_num_);
    } else {
      is_op_trace_ = false;
      op_trace_iter_num_ = 1;
  uint64_t module = GetProfilingModule();
  // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal
  int32_t device_num = static_cast<int32_t>(device_id_.size());
  if (device_num != 0) {
    auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
    if (device_id_ptr == nullptr) {
      GELOGE(FAILED, "Stop profiling: device id ptr is null.");
      return;
    }
  }
 #endif
  return ge::SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::StartProfiling(int32_t iter_num,
                                                                                             int32_t device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  if (!profiling_opts_.empty()) {
    GELOGI("Start profiling index is %d", iter_num);
    // current one docker only use one device
    Json p_device;

    try {
      // profiling need physical_device_id
      p_device[kDeviceID] = std::to_string(device_id);
      p_device[kJobID] = job_id_;
      p_device[kTraceID] = std::to_string(GetContext().TraceId());
      if (!prof_dir_.empty()) {
        p_device[kProfDir] = prof_dir_;
        GELOGI("Prof dir: %s.", prof_dir_.c_str());
      }

      Json features;
      if (is_op_trace_) {
        Json f;
        f[kName] = "op_trace";
        Json conf;
        if (op_trace_conf_.size() <= static_cast<size_t>(iter_num)) {
          GELOGE(FAILED, "Op trace iter num is invalid!");
          return FAILED;
        }
        Json events;
        events[0] = nlohmann::json::parse(op_trace_conf_[iter_num]);
        conf[0][kEvents] = events;
        f[kConf] = conf;
        features[0] = f;
        if (iter_num == 0) {
          is_load_ = true;
        }
      } else {
        for (std::vector<std::string>::size_type i = 0; i < profiling_opts_.size(); i++) {
          Json f;
          if (profiling_opts_[i] == "system_trace") {
            f[kConf] = nlohmann::json::parse(system_trace_conf_);
          } else if (profiling_opts_[i] == "task_trace") {
            if (!task_trace_conf_.empty()) {
              f[kConf] = nlohmann::json::parse(task_trace_conf_);
            }
          }
          f[kName] = profiling_opts_[i];
          features[i] = f;
        }
        is_load_ = true;
      }
      p_device[kFeatures] = features;
      // only one device, but sProfMgrStartUp API require for device list
      Json devices;
      devices[0] = p_device;

      Json start_cfg;
      start_cfg[kStartCfg] = devices;

      // convert json to string
      std::stringstream ss;
      ss << start_cfg;
      send_profiling_config_ = ss.str();
      GELOGI("Profiling config %s\n", send_profiling_config_.c_str());
    } catch (...) {
      GELOGE(FAILED, "Op trace json conf is not invalid !");
      return FAILED;
    for (int32_t i = 0; i < device_num; i++) {
      device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]);
    }

    // runtime startup for profiling
    uint64_t module = GetProfilingModule();
    int32_t device_num = 1;
    uint32_t device_id_rt = static_cast<uint32_t>(device_id);
    GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt));

    // call profiling startup API
    ProfMgrCfg prof_cfg = {send_profiling_config_};
    void *prof_handle = ProfMgrStartUp(&prof_cfg);
    if (prof_handle == nullptr) {
      GELOGW("ProfMgrStartUp failed on device %d ", device_id);
      return FAILED;
    rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
    if (rt_ret != RT_ERROR_NONE) {
      GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
    }
    GELOGD("StartProfiling, prof_handle: %p", prof_handle);
    prof_handle_vec_.push_back(prof_handle);
  }
 #endif
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() {
 #ifdef DAVINCI_SUPPORT_PROFILING
  Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
  if (reporter != nullptr) {
    int ret = reporter->Flush();
    GELOGI("Report data end, ret is %d", ret);
  // stop profiling
  if (prof_cb_.msprofCtrlCallback == nullptr) {
      GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr.");
      return;
  }
  uint64_t module = GetProfilingModule();
  int32_t device_num = static_cast<int32_t>(device_id_.size());
  auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
  if (device_id_ptr == nullptr) {
    GELOGE(FAILED, "Stop profiling: device id ptr is null.");
  int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE),
                                               nullptr, 0);
  if (cb_ret != 0) {
    GELOGW("call msprofCtrlCallback failed, type:%u, return:%d",
           static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret);
    return;
  }
  for (int32_t i = 0; i < device_num; i++) {
    device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]);
  }
  rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
  if (rt_ret != RT_ERROR_NONE) {
    GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
  }

  for (size_t i = 0; i < prof_handle_vec_.size(); ++i) {
    int result = ProfMgrStop(prof_handle_vec_[i]);
    if (result != 0) {
      GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]);
    }
  }
  vector<void *>().swap(prof_handle_vec_);
  is_load_ = false;
  recv_profiling_config_ = "";
  GELOGI("Stop Profiling success.");
 #endif
 }
@@ -392,12 +209,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
  if (reporter == nullptr) {
    GELOGI("Profiling report is nullptr!");
    return;
  }

  std::string data;
  for (const auto &task : task_desc_info) {
    std::string model_name = task.model_name;
@@ -405,14 +216,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
    uint32_t block_dim = task.block_dim;
    uint32_t task_id = task.task_id;
    uint32_t stream_id = task.stream_id;
    std::string shape_type = task.shape_type;
    int64_t cur_iter_num = task.cur_iter_num;
    data = model_name.append(" ")
                     .append(op_name).append(" ")
                     .append(std::to_string(block_dim).append(" ")
                     .append(std::to_string(block_dim)).append(" ")
                     .append(std::to_string(task_id)).append(" ")
                     .append(std::to_string(stream_id)).append(" ")
                     .append(std::to_string(model_id)).append("\n"));
                     .append(std::to_string(model_id)).append(" ")
                     .append(shape_type).append(" ")
                     .append(std::to_string(cur_iter_num)).append("\n");

    Msprof::Engine::ReporterData reporter_data{};
    ReporterData reporter_data{};
    reporter_data.deviceId = device_id;
    reporter_data.data = (unsigned char *)data.c_str();
    reporter_data.dataLen = data.size();
@@ -422,9 +237,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
      return;
    }

    ret = reporter->Report(&reporter_data);
    if (ret != SUCCESS) {
      GELOGE(ret, "Reporter data of task_desc_info fail!");
    int32_t cb_ret = CallMsprofReport(reporter_data);
    if (cb_ret != 0) {
      GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret);
      return;
    }
  }
@@ -436,9 +251,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
    uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
  GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;);

  std::string data;
  for (const auto &graph : compute_graph_desc_info) {
    data.append("model_name:")
@@ -493,64 +305,54 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
    }

    data.append(" model_id:").append(std::to_string(model_id));

    data.append(" task_id:").append(std::to_string(graph.task_id));
    data.append(" stream_id:").append(std::to_string(graph.stream_id));
    data.append("\n");

    Msprof::Engine::ReporterData reporter_data{};
    Report(device_id, data, *reporter, reporter_data);

    GraphDescReport(device_id, data);
    data.clear();
  }
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Report(
    const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter,
    Msprof::Engine::ReporterData &reporter_data) {
 void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  ReporterData reporter_data{};
  int ret = -1;
  int32_t cb_ret = -1;
  size_t index = data.size() / kReportMaxLen;
  if (index >= 1) {
    reporter_data.deviceId = device_id;
    int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
    for (size_t i = 0; i < index; ++i) {
      reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i;
      reporter_data.dataLen = kReportMaxLen;
      ret = reporter.Report(&reporter_data);
      GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;);
      cb_ret = CallMsprofReport(reporter_data);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
    }
    reporter_data.dataLen = data.size() - kReportMaxLen * index;
    if (reporter_data.dataLen != 0) {
      reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index;
      ret = reporter.Report(&reporter_data);
      GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;);
      cb_ret = CallMsprofReport(reporter_data);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
    }
  } else {
    reporter_data.deviceId = device_id;
    reporter_data.data = (unsigned char *)data.c_str();
    reporter_data.dataLen = data.size();
    int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);

    ret = reporter.Report(&reporter_data);
    GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Reporter data of graph_desc_info fail!"); return;);
  }
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit(const std::string &module) const {
 #ifdef DAVINCI_SUPPORT_PROFILING
  int ret = Msprof::Engine::UnInit(module);
  if (ret != SUCCESS) {
    GELOGE(ret, "profiling plugin uninit failed, ret:%d", ret);
    cb_ret = CallMsprofReport(reporter_data);
    GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
  }
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
    const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
    bool check_device) {
    const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  int32_t logic_device_id = 0;
  rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -559,13 +361,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
    return;
  }
  GELOGD("current logic_device_id:%d", logic_device_id);
  if (check_device) {
    auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
    if (ret == device_id_.end()) {
      GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
      return;
    }
  }
  GELOGD("start ProfilingTaskDescInfo.");
  ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
  GELOGD("start ProfilingGraphDescInfo.");
@@ -574,11 +369,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfilingConfig(
    const std::string &profiling_cfg) {
  recv_profiling_config_ = profiling_cfg;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() {
  uint64_t module = PROF_MODEL_EXECUTE_MASK |
                    PROF_RUNTIME_API_MASK |
@@ -594,9 +384,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP
  return module;
 }

 void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type,
                                                      uint32_t device_id,
                                                      uint64_t module) {
 void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  if (prof_type == kProfModelSubscribe) {
    if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) {
@@ -608,9 +396,13 @@ void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type,
      subs_dev_module_[device_id] = dev_info;
    }
  } else if (prof_type == kProfModelUnsubscribe) {
    if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) {
      if (subs_dev_module_[device_id].subscribe_count > 0) {
        subs_dev_module_[device_id].subscribe_count--;
    auto iter = subs_dev_module_.find(device_id);
    if (iter != subs_dev_module_.end()) {
      if (iter->second.subscribe_count > 0) {
        iter->second.subscribe_count--;
      }
      if (iter->second.subscribe_count == 0) {
        subs_dev_module_.erase(iter);
      }
    }
  } else {
@@ -626,10 +418,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
  uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK;
  if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) {
    // register framework to profiling
    int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
    if (result != SUCCESS) {
      GELOGE(FAILED, "Register profiling engine failed.");
      return FAILED;
    // register Framework to profiling
    int32_t cb_ret = PluginInit();
    if (cb_ret != 0) {
      GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret);
      return cb_ret;
    }
    GELOGI("Prof subscribe: model load profiling on.");
  }
@@ -647,7 +440,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
  UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module);

  // Report profiling data
  Status p_ret = davinci_model->ReportProfilingData(false);
  Status p_ret = davinci_model->ReportProfilingData();
  if (p_ret != SUCCESS) {
    GELOGE(p_ret, "Report profiling data failed.");
    return p_ret;
@@ -672,6 +465,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
  auto iter = subs_dev_module_.find(device[0]);
  if (iter != subs_dev_module_.end()) {
    if (subs_dev_module_[device[0]].subscribe_count == 1) {
      // The same device_id, only stop at last time
      rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device);
      if (rt_ret != RT_ERROR_NONE) {
        GELOGE(FAILED, "Runtime profiler stop failed.");
@@ -679,15 +473,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfMo
      }
    }
    UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module);
  } else {
    GELOGE(FAILED, "The device_id:%u has not been subscribed, do not need to cancel.", device[0]);
    return FAILED;
  }

  subscribe_count_--;
  if (subscribe_count_ == 0) {
    int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE);
    if (ret != SUCCESS) {
      GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret);
      return ret;
    }
    // profiling plugin uninit at last subscription
    PluginUnInit();
  }
 #endif
  return SUCCESS;
@@ -700,11 +494,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn

  if (model_load_mask == PROF_MODEL_LOAD_MASK) {
    // register Framework to profiling
    int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
    if (result != SUCCESS) {
      GELOGE(FAILED, "Register profiling engine failed.");
      return FAILED;
    int32_t cb_ret = PluginInit();
    if (cb_ret != 0) {
      GELOGE(cb_ret, "profiling plugin init failed, ret:%d", cb_ret);
      return cb_ret;
    }

    int32_t device_num = -1;
    rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr);
    if (rt_ret != RT_ERROR_NONE) {
@@ -719,7 +514,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn
  if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
    is_training_trace_ = true;
  }
  is_acl_api_mode_ = true;
  GELOGI("Prof init success.");
 #endif
  return SUCCESS;
@@ -730,19 +524,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi
  std::lock_guard<std::mutex> lock(mutex_);
  is_load_profiling_ = false;
  is_training_trace_ = false;
  is_acl_api_mode_ = false;
  is_execute_profiling_ = false;

  // profiling plugin uninit
  PluginUnInit();

  int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE);
  if (ret != SUCCESS) {
    GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret);
  }
  int32_t dev_num = -1;
  rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(FAILED, "Runtime profiler stop failed.");
    return FAILED;
  }

  for (auto device_id_module : device_id_module_map_) {
    if (device_id_module.second != 0) {
      uint32_t device_id = static_cast<uint32_t>(device_id_module.first);
@@ -792,6 +584,7 @@ Status ProfilingManager::ProfParseDeviceId(const std::map<std::string, std::stri
        return FAILED;
      } catch (std::out_of_range &) {
        GELOGE(FAILED, "Device id: %s is  out of range.", decvice_id[i].c_str());
        return FAILED;
      } catch (...) {
        GELOGE(FAILED, "Device id: %s cannot change to int.", decvice_id[i].c_str());
        return FAILED;
@@ -818,6 +611,7 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string>
      return FAILED;
    } catch (std::out_of_range &) {
      GELOGE(FAILED, "Device num: %s is  out of range.", iter->second.c_str());
      return FAILED;
    } catch (...) {
      GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str());
      return FAILED;
@@ -844,6 +638,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
    uint64_t module, const std::map<std::string, std::string> &config_para) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::lock_guard<std::mutex> lock(mutex_);
  uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK;
  if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
    is_training_trace_ = true;
  }
  int32_t device_num = 0;
  vector<int32_t> device_list;
  if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
@@ -859,7 +657,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
  for (int32_t i = 0; i < device_num; i++) {
    device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
  }
  GELOGD("Runtime config param: 0x%llx, device num: %d.", module, device_num);
  GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num);

  rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get());
  if (rt_ret != RT_ERROR_NONE) {
@@ -878,7 +676,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
    GELOGW("Prof start: load model module is invalid.");
  }
  UpdateDeviceIdModuleMap(kProfStart, module, device_list);
  GELOGD("Prof start profiling success.");
  GELOGI("Prof start profiling success.");
 #endif
  return SUCCESS;
 }
@@ -901,7 +699,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
  for (int32_t i = 0; i < device_num; i++) {
    device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
  }
  GELOGD("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num);
  GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num);
  rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(FAILED, "Prof stop: runtime profiler config proc failed.");
@@ -921,7 +719,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
    GELOGW("Prof stop: load model module is invalid.");
  }
  UpdateDeviceIdModuleMap(kProfStop, module, device_list);
  GELOGD("Prof stop profiling success.");
  GELOGI("Prof stop profiling success.");
 #endif
  return SUCCESS;
 }
@@ -963,47 +761,104 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
  }
  GELOGD("Current logic_device_id:%d", logic_device_id);
  GELOGI("Current logic_device_id:%d", logic_device_id);

  bool execute_model_prof_on = false;
  auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
  if (iter != device_id_.end()) {
    execute_model_prof_on = true;
  }
  GELOGD("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on);
  return is_execute_profiling_ || execute_model_prof_on;
  GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on);
  return  execute_model_prof_on;
 }

 /**
 * @brief Profiling PluginImpl
 */
 // PluginImpl static variable init
 Msprof::Engine::Reporter *PluginImpl::reporter_ = nullptr;

 PluginImpl::PluginImpl(const std::string &module) : module_(module) { GELOGI("Create PluginImpl\n"); }

 int PluginImpl::Init(const Msprof::Engine::Reporter *reporter) {
  GELOGI("PluginImpl init");
  reporter_ = const_cast<Msprof::Engine::Reporter *>(reporter);
  return 0;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const {
  if (prof_cb_.msprofReporterCallback == nullptr) {
    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
    return ge::PARAM_INVALID;
  }
  return prof_cb_.msprofReporterCallback(
      static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
      static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT),
      nullptr, 0);
 }

 int PluginImpl::UnInit() {
  GELOGI("PluginImpl Uninit");
  reporter_ = nullptr;
  return 0;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const {
 #ifdef DAVINCI_SUPPORT_PROFILING
  if (prof_cb_.msprofReporterCallback == nullptr) {
    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
    return;
  }
  int32_t cb_ret = prof_cb_.msprofReporterCallback(
      static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
      static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT),
      nullptr, 0);
  if (cb_ret != 0) {
    GELOGW("profiling plugin uninit failed, ret:%d", cb_ret);
  }
 #endif
 }

 Msprof::Engine::PluginIntf *ProfilingEngineImpl::CreatePlugin() {
  GELOGI(" Create Plugin");
  return new (std::nothrow) PluginImpl(GE_PROFILING_MODULE);
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport(
    ReporterData &reporter_data) const {
  if (prof_cb_.msprofReporterCallback == nullptr) {
    GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
    return ge::PARAM_INVALID;
  }
  return prof_cb_.msprofReporterCallback(
      static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
      static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT),
      static_cast<void *>(&reporter_data), sizeof(ReporterData));
 }

 int ProfilingEngineImpl::ReleasePlugin(Msprof::Engine::PluginIntf *plugin) {
  if (plugin != nullptr) {
    delete plugin;
    plugin = nullptr;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint(
    std::string &fp_point, std::string &bp_point) {
  // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init
  if (!fp_point_.empty() && !bp_point_.empty()) {
    fp_point = fp_point_;
    bp_point = bp_point_;
    GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(),
           fp_point.c_str());
    return;
  }
  // ProfApi mode and training trace is set
  // Parse options first
  char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
  bool is_profiling_valid = false;
  std::string profiling_options;
  if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS &&
      !profiling_options.empty()) {
    is_profiling_valid = true;
  } else {
    INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX);
    if (ret != EN_OK) {
      GELOGI("PROFILING_OPTIONS env is not exist.");
      return;
    }
    GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options);
    profiling_options = env_profiling_options;
    is_profiling_valid = true;
  }
  if (is_profiling_valid) {
    try {
      Json prof_options = Json::parse(profiling_options);

      fp_point_ = prof_options[kFpPoint];
      bp_point_ = prof_options[kBpPoint];

      fp_point = fp_point_;
      bp_point = bp_point_;
      if (!fp_point_.empty() && !bp_point_.empty()) {
        GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
      }
    } catch (...) {
      GELOGW("Json prof options is invalid.");
      return;
    }
  }
  return 0;

  return;
 }


 }  // namespace ge
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -26,9 +26,7 @@
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/ge_types.h"
 #include "external/register/register_types.h"
 #include "toolchain/prof_engine.h"
 #include "toolchain/prof_mgr_core.h"
 #include "toolchain/prof_acl_api.h"
 #include "toolchain/prof_callback.h"

 using std::map;
 using std::string;
@@ -37,35 +35,33 @@ using Json = nlohmann::json;

 namespace {
  const std::string GE_PROFILING_MODULE = "Framework";
  // DataTypeConfig MASK
  const uint64_t PROF_ACL_API_MASK = 0x0001;
  const uint64_t PROF_TASK_TIME_MASK = 0x0002;
  const uint64_t PROF_AICORE_METRICS_MASK = 0x0004;
  const uint64_t PROF_AICPU_TRACE_MASK = 0x0008;
  const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010;
  const uint64_t PROF_RUNTIME_API_MASK = 0x0020;
  const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040;
  const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080;
  const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100;
  const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200;
  const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400;
  const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800;
  const uint64_t PROF_HCCL_TRACE_MASK = 0x1000;
  const uint64_t PROF_DATA_PROCESS_MASK = 0x2000;
  const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000;

 }  // namespace
 namespace ge {
 struct DeviceSubsInfo {
  uint64_t module;
  uint32_t subscribe_count;
 };
 // register Plugin
 class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf {
 public:
  explicit PluginImpl(const std::string &module);
  ~PluginImpl() {}

  int Init(const Msprof::Engine::Reporter *reporter);
  int UnInit();
  static Msprof::Engine::Reporter *GetPluginReporter() { return reporter_; }

 private:
  static Msprof::Engine::Reporter *reporter_;
  std::string module_;
 };

 // register Engine
 class ProfilingEngineImpl : public Msprof::Engine::EngineIntf {
 public:
  ProfilingEngineImpl() {}
  ~ProfilingEngineImpl() {}

  Msprof::Engine::PluginIntf *CreatePlugin();
  int ReleasePlugin(Msprof::Engine::PluginIntf *plugin);
 struct MsprofCallback {
  MsprofCtrlCallback msprofCtrlCallback;
  MsprofReporterCallback msprofReporterCallback;
 };

 class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
@@ -73,68 +69,55 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  ProfilingManager();
  virtual ~ProfilingManager();
  static ProfilingManager &Instance();
  ge::Status Init(const Options &options);
  ge::Status InitFromOptions(const Options &options);
  ge::Status InitFromAclCfg(const std::string &config);
  ge::Status StartProfiling(int32_t iter, int32_t device_id);
  void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);
  ge::Status ProfModelSubscribe(uint64_t module, void *model);
  ge::Status ProfModelUnsubscribe(void *model);
  ge::Status ProfInit(uint64_t module);
  ge::Status ProfFinalize();
  ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
  ge::Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
  Status Init(const Options &options);
  Status ProfInit(uint64_t module);
  Status ProfFinalize();
  Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
  Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
  Status ProfModelSubscribe(uint64_t module, void *model);
  Status ProfModelUnsubscribe(void *model);
  void StopProfiling();
  bool ProfilingOpTraceOn() const { return is_op_trace_; }
  bool ProfilingLoadFlag() const { return is_load_; }
  bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
  bool ProfilingModelLoadOn() const { return is_load_profiling_; }
  bool ProfilingModelExecuteOn() const;
  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used  by command pattern
  bool IsAclApiMode() const { return is_acl_api_mode_; }
  int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; }
  // is_execute_profiling_ only used by ge option and env
  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
                           bool check_device);
  void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter,
              Msprof::Engine::ReporterData &reporter_data);
                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
  void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                             const int32_t &device_id);
  void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
                              const int32_t &device_id);
  void SetProfilingConfig(const string &profiling_cfg);
  vector<int32_t> GetProfilingDeviceId() const { return  device_id_; }
  void PluginUnInit(const std::string &module) const;
  Status PluginInit() const;
  void PluginUnInit() const;
  Status CallMsprofReport(ReporterData &reporter_data) const;
  struct MsprofCallback &GetMsprofCallback() { return prof_cb_; }
  void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
  void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
  void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
 private:
  ge::Status ParseFeaturesFromAclCfg(const Json &feature);
  ge::Status ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num,
                            vector<int32_t> &device_list);
  ge::Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
  Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
  Status ParseOptions(const std::string &options);
  Status ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num,
                        vector<int32_t> &device_list);
  Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
                               vector<int32_t> &device_list);
  uint64_t GetProfilingModule();
  void GraphDescReport(const int32_t &device_id, const string &data);
  void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list);
  bool is_load_profiling_ = false;
  bool is_execute_profiling_ = false;
  bool is_op_trace_ = false;
  bool is_load_ = false;
  bool is_training_trace_ = false;
  bool is_acl_api_mode_ = false;
  int32_t op_trace_iter_num_ = 0;
  string job_id_;
  string prof_dir_;
  void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);

  bool is_load_profiling_;
  bool is_execute_profiling_;
  bool is_training_trace_;
  vector<int32_t> device_id_;
  vector<string> op_trace_conf_;
  vector<string> profiling_opts_;
  vector<void *> prof_handle_vec_;
  string recv_profiling_config_;
  string send_profiling_config_;
  string system_trace_conf_;
  string task_trace_conf_;
  const ProfilingEngineImpl engine_;
  map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module
  map<uint32_t, DeviceSubsInfo> subs_dev_module_; // key: device_id, value: profiling on module
  uint32_t subscribe_count_;
  std::mutex mutex_;
  MsprofCallback prof_cb_;
  std::string fp_point_;
  std::string bp_point_;
 };
 }  // namespace ge
 #endif  // GE_COMMON_PROFILING_PROFILING_MANAGER_H_
--- a/ge/common/proto/ge_ir.proto
+++ b/ge/common/proto/ge_ir.proto
@@ -30,6 +30,7 @@ enum DataType
    DT_RESOURCE  = 23;         // resource type
    DT_STRING_REF = 24;        // string_ref type
    DT_DUAL      = 25;              /**< dual output type */
    DT_VARIANT = 26;           // variant type
 }

 message AttrDef
--- a/ge/common/proto/op_mapping_info.proto
+++ b/ge/common/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/common/proto/tensorflow/attr_value.proto
+++ b/ge/common/proto/tensorflow/attr_value.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/function.proto
+++ b/ge/common/proto/tensorflow/function.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/graph.proto
+++ b/ge/common/proto/tensorflow/graph.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/graph_library.proto
+++ b/ge/common/proto/tensorflow/graph_library.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/node_def.proto
+++ b/ge/common/proto/tensorflow/node_def.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/op_def.proto
+++ b/ge/common/proto/tensorflow/op_def.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/resource_handle.proto
+++ b/ge/common/proto/tensorflow/resource_handle.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/tensor.proto
+++ b/ge/common/proto/tensorflow/tensor.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/tensor_shape.proto
+++ b/ge/common/proto/tensorflow/tensor_shape.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 // Protocol buffer representing the shape of tensors.

 syntax = "proto3";
--- a/ge/common/proto/tensorflow/types.proto
+++ b/ge/common/proto/tensorflow/types.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/proto/tensorflow/versions.proto
+++ b/ge/common/proto/tensorflow/versions.proto
@@ -1,3 +1,11 @@
 /**
 * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
 *
 * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
 * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
 * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
 */

 syntax = "proto3";

 package domi.tensorflow;
--- a/ge/common/types.cc
+++ b/ge/common/types.cc
@@ -480,6 +480,9 @@ REGISTER_OPTYPE_DEFINE(HVDWAIT, "HorovodWait");
 // aicpu op for online_infer dynamic_dims
 REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims");

 // profiling training trace node
 REGISTER_OPTYPE_DEFINE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");

 const std::string MODEL_ATTR_TASKS = "tasks";
 const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr";
 const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr";
@@ -801,7 +804,7 @@ const uint32_t XRGB_CHN_NUM = 4;
 ///
 const bool DEFAULT_GLOBAL_POOLING = false;

 const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0///
 const uint32_t MODEL_VERSION = 0x20000000; ///< Model version 2.0///

 // Eltwise's input size
 const int ELTWISE_MIN_INPUT_SIZE = 2;
--- a/ge/common/util.cc
+++ b/ge/common/util.cc
@@ -51,14 +51,15 @@ namespace {
 * If such an exception is encountered during operation,
 * the proto file can be divided into several small files or the limit value can be increased.
 */
 const int kProtoReadBytesLimit = INT_MAX;     // Max size of 2 GB minus 1 byte.
 const int kWarningThreshold = 536870912 * 2;  // 536870912 represent 512M
 const int kFileSizeOutLimitedOrOpenFailed = -1;
 const int kProtoReadBytesLimit = INT_MAX;  // Max size of 2 GB minus 1 byte.
 const int kWarningThreshold = 1073741824;  // 536870912 * 2 536870912 represent 512M

 /// The maximum length of the file.
 const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now
 const uint32_t kMaxFileSizeLimit = UINT32_MAX;  // 4G for now
 const int kMaxBuffSize = 256;
 const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character";
 constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024;
 constexpr uint32_t kMaxConfigFileByte = 10485760;  // 10 * 1024 * 1024
 }  // namespace

 namespace ge {
@@ -76,7 +77,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(co
  std::string real_path = RealPath(file);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file);

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == kFileSizeOutLimitedOrOpenFailed, return false,
                                 "file size not valid.");

  std::ifstream fs(real_path, std::ifstream::in | std::ifstream::binary);
  if (!fs.is_open()) {
@@ -118,20 +120,20 @@ long GetFileLength(const std::string &input_file) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str());
  unsigned long long file_length = 0;
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      mmGetFileSize(input_file.c_str(), &file_length) != EN_OK,
      ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)});
      return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno));
    mmGetFileSize(input_file.c_str(), &file_length) != EN_OK,
    ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)});
    return kFileSizeOutLimitedOrOpenFailed, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno));

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0),
                                 ErrorManager::GetInstance().ATCReportErrMessage("E19015", {"filepath"}, {input_file});
                                 return -1, "File[%s] size is 0, not valid.", input_file.c_str());

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > kMaxFileSizeLimit,
                                 ErrorManager::GetInstance().ATCReportErrMessage(
                                     "E19016", {"filepath", "filesize", "maxlen"},
                                     {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)});
                                 return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length,
                                        kMaxFileSizeLimit);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
    file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage(
                                       "E19016", {"filepath", "filesize", "maxlen"},
                                       {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)});
    return kFileSizeOutLimitedOrOpenFailed, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length,
           kMaxFileSizeLimit);
  return static_cast<long>(file_length);
 }

@@ -187,7 +189,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co
  std::streamsize size = file.tellg();

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((size <= 0), file.close(); return false, "file length <= 0, not valid.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast<int64_t >(kMaxFileSizeLimit), file.close();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast<int64_t>(kMaxFileSizeLimit), file.close();
                                 return false, "file size %ld is out of limit: %d.", size, kMaxFileSizeLimit);

  file.seekg(0, std::ios::beg);  // [no need to check value]
@@ -210,8 +212,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::
  GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty.");
  auto dir_path_len = directory_path.length();
  if (dir_path_len >= MMPA_MAX_PATH) {
    ErrorManager::GetInstance().ATCReportErrMessage(
        "E19002", {"filepath", "size"}, {directory_path, std::to_string(MMPA_MAX_PATH)});
    ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"},
                                                    {directory_path, std::to_string(MMPA_MAX_PATH)});
    GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), MMPA_MAX_PATH);
    return -1;
  }
@@ -224,8 +226,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::
        if (ret != 0) {
          if (errno != EEXIST) {
            ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path});
            GELOGW("Can not create directory %s. Make sure the directory exists and writable.",
                   directory_path.c_str());
            GELOGW("Can not create directory %s. Make sure the directory exists and writable.", directory_path.c_str());
            return ret;
          }
        }
@@ -265,7 +266,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromText(const ch

  std::string real_path = RealPath(file);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), ErrorManager::GetInstance().ATCReportErrMessage(
                                                        "E19000", {"path", "errmsg"}, {file, strerror(errno)});
                                                      "E19000", {"path", "errmsg"}, {file, strerror(errno)});
                                 return false, "Path[%s]'s realpath is empty, errmsg[%s]", file, strerror(errno));

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid.");
@@ -301,13 +302,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha
  google::protobuf::io::IstreamInputStream input(&fs);
  bool ret = google::protobuf::TextFormat::Parse(&input, message);
  GE_IF_BOOL_EXEC(
      !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file."));
    !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file."));

  return ret;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() {
  mmTimeval tv {};
  mmTimeval tv{};
  int ret = mmGetTimeOfDay(&tv, nullptr);
  GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret);
  auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000;  // 1000000: seconds to microseconds
@@ -315,7 +316,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp()
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t GetCurrentSecondTimestap() {
  mmTimeval tv {};
  mmTimeval tv{};
  int ret = mmGetTimeOfDay(&tv, nullptr);
  GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret);
  auto total_use_time = tv.tv_sec;  // seconds
@@ -350,8 +351,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL.");
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(path) >= MMPA_MAX_PATH,
      ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(MMPA_MAX_PATH)});
      return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH);
                                 ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"},
                                                                                 {path, std::to_string(MMPA_MAX_PATH)});
                                 return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH);

  // Nullptr is returned when the path does not exist or there is no permission
  // Return absolute path when path is accessible
@@ -385,16 +387,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
  // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores
  // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.)
 #ifdef __GNUC__
        std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
  std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
 #else
        std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$";
  std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$";
 #endif

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      !ValidateStr(real_path, mode),
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {atc_param, real_path, kPathValidReason});
      return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason);
    !ValidateStr(real_path, mode),
    ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                    {atc_param, real_path, kPathValidReason});
    return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason);

  // The absolute path points to a file that is not readable
  if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) {
@@ -416,24 +418,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
  }

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH,
      ErrorManager::GetInstance().ATCReportErrMessage(
          "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)});
      return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH);
                                 ErrorManager::GetInstance().ATCReportErrMessage(
                                   "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)});
                                 return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(),
                                        MMPA_MAX_PATH);

  // A regular matching expression to verify the validity of the input file path
  // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores
  // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.)
 #ifdef __GNUC__
     std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
  std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
 #else
     std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$";
  std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$";
 #endif

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
      !ValidateStr(file_path, mode),
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {atc_param, file_path, kPathValidReason});
      return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason);
    !ValidateStr(file_path, mode),
    ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                    {atc_param, file_path, kPathValidReason});
    return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason);

  std::string real_path = RealPath(file_path.c_str());
  // Can get absolute path (file exists)
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -17,6 +17,7 @@ set(SRC_LIST
    "../common/dump/dump_properties.cc"
    "../common/dump/dump_manager.cc"
    "../common/dump/dump_op.cc"
    "../common/profiling/ge_profiling.cc"
    "../graph/load/graph_loader.cc"
    "../graph/execute/graph_execute.cc"
    "../omm/csa_interact.cc"
@@ -27,6 +28,7 @@ set(SRC_LIST
    "../graph/manager/trans_var_data_utils.cc"
    "../graph/manager/util/debug.cc"
    "../graph/manager/rdma_pool_allocator.cc"
    "../graph/manager/host_mem_allocator.cc"
    "../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
    "../model/ge_model.cc"
    "../model/ge_root_model.cc"
@@ -161,7 +163,7 @@ set(SRC_LIST
 add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS})

 target_compile_options(ge_executor PRIVATE
    $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations>
    $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
    $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
    $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
 )
@@ -172,6 +174,7 @@ target_compile_definitions(ge_executor PRIVATE
    google=ascend_private
    $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
    $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
    LOG_CPP
 )

 target_include_directories(ge_executor PRIVATE
@@ -244,7 +247,6 @@ target_link_libraries(ge_executor_shared PRIVATE
    mmpa
    graph
    register
    msprof
    error_manager
    ascend_hal_stub
    ascend_protobuf
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -39,8 +39,6 @@
 #include "graph/manager/graph_var_manager.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
 #include "graph/opsproto_manager.h"
 #include "ge_local_engine/engine/host_cpu_engine.h"

 using std::string;
 using std::vector;
@@ -209,46 +207,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,

 namespace ge {
 bool GeExecutor::isInit_ = false;
 class ModelListenerAdapter : public ModelListener {
 public:
  domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
                             std::vector<ge::OutputTensorInfo> &outputs) {
    if (listener == nullptr) {
      GELOGE(ge::FAILED, "listener is null.");
      return FAILED;
    }
    return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
  }

  std::shared_ptr<ge::ModelListener> listener;
 };

 static void InitOpsProtoManger() {
  string opsproto_path;
  const char *path_env = std::getenv("ASCEND_OPP_PATH");
  if (path_env != nullptr) {
    string path = path_env;
    string file_path = RealPath(path.c_str());
    if (file_path.empty()) {
      GELOGE(FAILED, "File path %s is invalid.", path.c_str());
      return;
    }
    opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
    GELOGI("Get opsproto so path from env : %s", path.c_str());
  } else {
    string path_base = PluginManager::GetPath();
    GELOGI("path_base is %s", path_base.c_str());
    path_base = path_base.substr(0, path_base.rfind('/'));
    path_base = path_base.substr(0, path_base.rfind('/') + 1);
    opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
  }

  GELOGI("Get opsproto path is %s", opsproto_path.c_str());
  OpsProtoManager *manager = OpsProtoManager::Instance();
  map<string, string> option_tmp;
  option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
  (void)manager->Initialize(option_tmp);
 }

 GeExecutor::GeExecutor() {}

@@ -259,16 +217,6 @@ Status GeExecutor::Initialize() {
    return ge::SUCCESS;
  }

  OpTilingManager::GetInstance().LoadSo();

  Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
  if (initHostCpuEngineStatus != SUCCESS) {
    GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
    return initHostCpuEngineStatus;
  }

  InitOpsProtoManger();

  std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
  mem_type.push_back(RT_MEMORY_P2P_DDR);
  auto ret = MemManager::Instance().Initialize(mem_type);
@@ -283,7 +231,8 @@ Status GeExecutor::Initialize() {
  // Start profiling
  Options profiling_options;
  profiling_options.device_id = 0;
  profiling_options.job_id = "";
  // job id need to be set, the value is meaningless;
  profiling_options.job_id = "1";
  ProfilingManager::Instance().Init(profiling_options);

  isInit_ = true;
@@ -303,7 +252,7 @@ Status GeExecutor::Finalize() {
  // Stop profiling
  if (ProfilingManager::Instance().ProfilingOn()) {
    ProfilingManager::Instance().StopProfiling();
    ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE);
    ProfilingManager::Instance().PluginUnInit();
  }

  GELOGI("Uninit GeExecutor over.");
@@ -572,60 +521,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
  return SUCCESS;
 }

 // Load model
 Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
                                    int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("load model offline begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  string filePath = RealPath(path.c_str());
  if (filePath.empty()) {
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
           "File path is invalid. please check your text file '%s'.", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
  }

  std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
  if (listener_adapter == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  listener_adapter->listener = listener;

  Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  return SUCCESS;
 }

 Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
                             std::shared_ptr<ge::ModelListener> listener) {
  GELOGI("Load model begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
  if (listener_adapter == nullptr) {
    GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
    return ACL_ERROR_GE_MEMORY_ALLOCATION;
  }
  listener_adapter->listener = listener;

  Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GeExecutor] LoadModel failed.");
    return ACL_ERROR_GE_LOAD_MODEL;
  }
  return ret;
 }

 Status GeExecutor::UnloadModel(uint32_t model_id) {
  GELOGD("unload model %u begin.", model_id);
  if (!isInit_) {
@@ -635,10 +530,11 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
  Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
    return ACL_ERROR_GE_INTERNAL_ERROR;
    return ret;
  }

  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id);
  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model =
      ModelManager::GetInstance()->GetHybridModel(model_id);
  if (hybrid_davinci_model != nullptr) {
    uint64_t session_id = hybrid_davinci_model->GetSessionId();
    VarManagerPool::Instance().RemoveVarManager(session_id);
@@ -652,26 +548,11 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
  ret = GraphLoader::UnloadModel(model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
    return ACL_ERROR_GE_UNLOAD_MODEL;
    return ret;
  }
  return SUCCESS;
 }

 Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
  GELOGI("run model begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  InputData inputs;
  GetDomiInputData(input_data, inputs);
  OutputData outputs;
  GetDomiOutputData(output_data, outputs);

  return GraphExecutor::DataInput(inputs, outputs);
 }

 // Get input and output descriptor
 Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                    std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {
@@ -795,7 +676,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }
  Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info);
  Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGW("GetAIPPInfo is not success.");
    return ret;
@@ -832,43 +713,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn
  return SUCCESS;
 }

 Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                                               std::vector<TensorDesc> &output_desc) {
  GELOGI("get model desc info for zero copy begin.");
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
  }

  std::vector<InputOutputDescInfo> input_desc_infos;
  std::vector<InputOutputDescInfo> output_desc_infos;
  std::vector<uint32_t> input_formats;
  std::vector<uint32_t> output_formats;

  Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos,
                                                                input_formats, output_formats);
  if (ret != domi::SUCCESS) {
    GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret);
    return ACL_ERROR_GE_GET_TENSOR_INFO;
  }

  if (input_formats.size() != input_desc_infos.size()) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "input_formats.size() != input_desc_infos.size().");
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  if (output_formats.size() != output_desc_infos.size()) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats.size() != output_desc_infos.size().");
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats);
  GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats);

  GELOGI("get model desc info from zero copy end.");
  return ge::SUCCESS;
 }

 Status GeExecutor::CommandHandle(const Command &command) {
  Status ret = GraphLoader::CommandHandle(command);
  if (ret != SUCCESS) {
--- a/ge/executor/module.mk
+++ b/ge/executor/module.mk
@@ -8,12 +8,14 @@ local_ge_executor_src_files :=  \
    ../common/dump/dump_op.cc \
    ../common/ge/plugin_manager.cc \
    ../common/ge/op_tiling_manager.cc \
    ../common/profiling/ge_profiling.cc \
    ../graph/load/graph_loader.cc \
    ../graph/execute/graph_execute.cc \
    ../omm/csa_interact.cc \
    ../graph/manager/graph_manager_utils.cc \
    ../graph/manager/graph_var_manager.cc \
    ../graph/manager/rdma_pool_allocator.cc \
    ../graph/manager/host_mem_allocator.cc \
    ../graph/manager/graph_mem_allocator.cc \
    ../graph/manager/graph_caching_allocator.cc \
    ../graph/manager/trans_var_data_utils.cc \
@@ -177,7 +179,6 @@ local_ge_executor_shared_library :=        \
    libmmpa                                \
    libgraph                               \
    libregister                            \
    libmsprof                              \
    liberror_manager                       \

 local_ge_executor_ldflags := -lrt -ldl     \
@@ -234,7 +235,6 @@ LOCAL_SHARED_LIBRARIES :=                  \
    libmmpa                                \
    libgraph                               \
    libregister                            \
    libmsprof                              \
    liberror_manager                       \
    stub/libascend_hal                     \

@@ -272,7 +272,6 @@ LOCAL_SHARED_LIBRARIES :=                  \
    libruntime                             \
    libslog                                \
    libmmpa                                \
    libmsprof                              \

 LOCAL_LDFLAGS += $(local_ge_executor_ldflags)

@@ -304,7 +303,6 @@ LOCAL_SHARED_LIBRARIES :=                  \
    libruntime                             \
    libslog                                \
    libmmpa                                \
    libmsprof                              \

 ifeq ($(device_os),android)
 LOCAL_LDFLAGS += -ldl
--- a/ge/executor/proto/dump_task.proto
+++ b/ge/executor/proto/dump_task.proto
@@ -28,6 +28,7 @@ enum OutputDataType {
    DT_RESOURCE = 23;
    DT_STRING_REF = 24;
    DT_DUAL = 25;
    DT_VARIANT = 26;
 }

 enum OutputFormat {
@@ -108,4 +109,5 @@ message DumpData{
    repeated OpOutput output = 3;
    repeated OpInput input = 4;
    repeated OpBuffer buffer = 5;
    string op_name = 6;
 }
--- a/ge/executor/proto/ge_ir.proto
+++ b/ge/executor/proto/ge_ir.proto
@@ -30,6 +30,7 @@ enum DataType
    DT_RESOURCE  = 23;         // resource type
    DT_STRING_REF = 24;        // string_ref type
    DT_DUAL      = 25;              /**< dual output type */
    DT_VARIANT = 26;           // variant type
 }

 message AttrDef
--- a/ge/executor/proto/op_mapping_info.proto
+++ b/ge/executor/proto/op_mapping_info.proto
@@ -15,6 +15,7 @@ message Output {
    int32 original_output_data_type = 7;
    int32 original_output_format = 8;
    uint64 size = 9;
    Shape origin_shape = 10;
 }

 message Input {
@@ -23,6 +24,7 @@ message Input {
    Shape shape = 3;
    uint64 address = 4;
    uint64 size = 5;
    Shape origin_shape = 6;
 }

 enum BufferType {
--- a/ge/ge_inference.mk
+++ b/ge/ge_inference.mk
@@ -64,6 +64,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \
    graph/manager/graph_var_manager.cc \
    graph/manager/host_mem_manager.cc \
    graph/manager/rdma_pool_allocator.cc \
    graph/manager/host_mem_allocator.cc \
    graph/manager/graph_mem_allocator.cc \
    graph/manager/graph_caching_allocator.cc \

@@ -102,6 +103,7 @@ OMG_HOST_SRC_FILES := \
    graph/passes/net_output_pass.cc \
    graph/passes/replace_transshape_pass.cc \
    graph/passes/constant_fuse_same_pass.cc \
    graph/passes/fuse_data_nodes_with_common_input_pass.cc \
    graph/passes/print_op_pass.cc \
    graph/passes/no_use_reshape_remove_pass.cc \
    graph/passes/iterator_op_pass.cc \
@@ -109,6 +111,7 @@ OMG_HOST_SRC_FILES := \
    graph/passes/atomic_addr_clean_pass.cc \
    graph/passes/mark_same_addr_pass.cc \
    graph/passes/mark_graph_unknown_status_pass.cc \
    graph/passes/dynamic_single_op_reset_shape_pass.cc \
    graph/passes/mark_agnostic_pass.cc \
    graph/common/omg_util.cc \
    graph/common/bcast.cc \
@@ -164,6 +167,7 @@ OMG_HOST_SRC_FILES := \
    host_kernels/slice_d_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    host_kernels/identity_kernel.cc \
    host_kernels/reformat_kernel.cc \
    graph/passes/stop_gradient_pass.cc \
    graph/passes/prevent_gradient_pass.cc \
    graph/passes/identity_pass.cc \
@@ -189,9 +193,12 @@ OMG_HOST_SRC_FILES := \
    graph/passes/control_trigger_pass.cc \
    graph/passes/cond_pass.cc \
    graph/passes/cond_remove_pass.cc \
    graph/passes/remove_same_const_pass.cc \
    graph/passes/useless_control_out_remove_pass.cc \
    graph/passes/for_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/assign_pass.cc \
    graph/passes/assign_remove_pass.cc \
    graph/passes/inplace_support_check_pass.cc \
    graph/passes/addn_pass.cc \
    graph/passes/common_subexpression_elimination_pass.cc \
    graph/passes/transop_symmetry_elimination_pass.cc \
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@@ -26,6 +26,7 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

 target_compile_options(ge_local_engine PRIVATE
    -Werror
    -fno-common
 )

 target_compile_definitions(ge_local_engine PRIVATE
@@ -55,10 +56,8 @@ target_link_libraries(ge_local_engine PRIVATE
    -Wl,--no-as-needed
    graph
    ascend_protobuf
    register
    c_sec
    slog
    runtime
    -Wl,--as-needed
 )

@@ -67,6 +66,7 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

 target_compile_options(atc_ge_local_engine PRIVATE
    -Werror
    -fno-common
 )

 target_compile_definitions(atc_ge_local_engine PRIVATE
@@ -97,10 +97,8 @@ target_link_libraries(atc_ge_local_engine PRIVATE
    -Wl,--no-as-needed
    graph
    ascend_protobuf
    register
    c_sec
    slog
    runtime_compile
    -Wl,--as-needed
 )

@@ -114,6 +112,7 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR

 target_compile_options(ge_local_opskernel_builder PRIVATE
    -Werror
    -fno-common
 )

 target_compile_definitions(ge_local_opskernel_builder PRIVATE
@@ -154,6 +153,7 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO

 target_compile_options(atc_ge_local_opskernel_builder PRIVATE
    -Werror
    -fno-common
 )

 target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
@@ -199,10 +199,12 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR

 target_compile_options(ge_local_opskernel_builder_static PRIVATE
    -Werror
    -fno-common
 )

 target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
    google=ascend_private
    LOG_CPP
 )

 target_include_directories(ge_local_opskernel_builder_static PRIVATE
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -14,7 +14,6 @@
 * limitations under the License.
 */
 #include "host_cpu_engine.h"
 #include <dlfcn.h>
 #include "graph/common/omg_util.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_adapter.h"
@@ -31,35 +30,21 @@ namespace {
  case (DTYPE): {                                                                                                      \
    GeTensorPtr ge_tensor = nullptr;                                                                                   \
    if (need_create_flag) {                                                                                            \
      GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));  \
      std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]());                                                \
      if (buf == nullptr) {                                                                                            \
        GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed",                                         \
               static_cast<size_t>(sizeof(TYPE) * data_num));                                                          \
        return MEMALLOC_FAILED;                                                                                        \
      }                                                                                                                \
      ge_tensor = MakeShared<GeTensor>(out_desc);                                                                      \
      uint64_t size = data_num * sizeof(TYPE);                                                                         \
      ge_tensor = MakeShared<GeTensor>(out_desc, size);                                                                \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
      if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) {      \
        GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str());          \
        return MEMALLOC_FAILED;                                                                                        \
      }                                                                                                                \
      GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size);                   \
      ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType());                                              \
      ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape());                                                    \
      outputs.emplace_back(ge_tensor);                                                                                 \
    } else {                                                                                                           \
      ge_tensor = outputs[i];                                                                                          \
      GE_CHECK_NOTNULL(ge_tensor);                                                                                     \
      GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i,                          \
             reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size());             \
      GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i);                                             \
    }                                                                                                                  \
    auto tensor = TensorAdapter::AsTensor(*ge_tensor);                                                                 \
    auto tensor_name = op_desc->GetOutputNameByIndex(i);                                                               \
    GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu",               \
                               op_desc->GetName().c_str(), i);                                                         \
    GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu",     \
           op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize());                    \
    named_outputs.emplace(tensor_name, tensor);                                                                        \
    break;                                                                                                             \
  }
@@ -96,8 +81,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {

 void HostCpuEngine::CloseSo() {
  for (auto handle : lib_handles_) {
    if (dlclose(handle) != 0) {
      GELOGW("failed to close handle, message: %s", dlerror());
    if (mmDlclose(handle) != 0) {
      GELOGW("failed to close handle, message: %s", mmDlerror());
    }
  }
  lib_handles_.clear();
@@ -236,16 +221,30 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs,
  GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str());
  std::unique_ptr<HostCpuOp> op_kernel;
  GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel));

  std::map<std::string, const Tensor> named_inputs;
  std::vector<GeTensorPtr> tmp_outputs;
  tmp_outputs.swap(outputs);
  std::map<std::string, Tensor> named_outputs;
  auto op_desc = node->GetOpDesc();
  GE_CHK_STATUS_RET_NOLOG(PrepareInputs(op_desc, inputs, named_inputs));
  GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs));
  GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, outputs, named_outputs));
  GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs));

  std::vector<GeTensorPtr> tmp_outputs;
  for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) {
    auto tensor_name = op_desc->GetOutputNameByIndex(i);
    if (tensor_name.empty()) {
      GELOGE(INTERNAL_ERROR, "Failed to get output name. node = %s, index = %zu", op_desc->GetName().c_str(), i);
      return INTERNAL_ERROR;
    }
    auto iter = named_outputs.find(tensor_name);
    if (iter == named_outputs.end()) {
       GELOGE(INTERNAL_ERROR, "Failed to get output tensor. node = %s, index = %zu, tensor_name = %s",
              op_desc->GetName().c_str(), i, tensor_name.c_str());
      return INTERNAL_ERROR;
    }
    auto ge_tensor = MakeShared<GeTensor>(TensorAdapter::AsGeTensor(iter->second));
    GE_CHECK_NOTNULL(ge_tensor);
    tmp_outputs.emplace_back(ge_tensor);
  }
  GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str());
  outputs.swap(tmp_outputs);
  return SUCCESS;
@@ -323,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) {

 Status HostCpuEngine::LoadLib(const std::string &lib_path) {
  GELOGI("To invoke dlopen on lib: %s", lib_path.c_str());
  auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
  auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
  if (handle == nullptr) {
    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror());
    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror());
    return INTERNAL_ERROR;
  }

  auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize");
  auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize");
  if (initialize != nullptr) {
    GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str());
    if (initialize(HostCpuContext()) != SUCCESS) {
--- a/ge/ge_runner.mk
+++ b/ge/ge_runner.mk
@@ -29,6 +29,8 @@ LIBGE_LOCAL_SRC_FILES := \
    common/dump/dump_manager.cc \
    common/dump/dump_properties.cc \
    common/dump/dump_op.cc \
    common/profiling/ge_profiling.cc \
    common/profiling/ge_runner_profiling.cc \
    engine_manager/dnnengine_manager.cc \
    ge_local_engine/engine/host_cpu_engine.cc \
    generator/ge_generator.cc \
@@ -92,6 +94,7 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/manager/graph_var_manager.cc \
    graph/manager/host_mem_manager.cc \
    graph/manager/rdma_pool_allocator.cc \
    graph/manager/host_mem_allocator.cc \
    graph/manager/memory_api.cc \
    graph/manager/model_manager/event_manager.cc        \
    graph/manager/trans_var_data_utils.cc \
@@ -111,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/passes/atomic_addr_clean_pass.cc \
    graph/passes/mark_same_addr_pass.cc \
    graph/passes/mark_graph_unknown_status_pass.cc \
    graph/passes/dynamic_single_op_reset_shape_pass.cc \
    graph/passes/mark_agnostic_pass.cc \
    graph/partition/dynamic_shape_partition.cc \
    graph/partition/stage_partition.cc \
@@ -123,13 +127,17 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/passes/compile_nodes_pass.cc \
    graph/passes/constant_folding_pass.cc \
    graph/passes/constant_fuse_same_pass.cc \
    graph/passes/fuse_data_nodes_with_common_input_pass.cc \
    graph/passes/remove_same_const_pass.cc \
    graph/passes/useless_control_out_remove_pass.cc \
    graph/passes/control_trigger_pass.cc \
    graph/passes/dimension_adjust_pass.cc \
    graph/passes/dimension_compute_pass.cc \
    graph/passes/dropout_pass.cc \
    graph/passes/hccl_group_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/assign_pass.cc \
    graph/passes/assign_remove_pass.cc \
    graph/passes/inplace_support_check_pass.cc \
    graph/passes/flow_ctrl_pass.cc \
    graph/passes/global_step_insert_pass.cc \
    host_kernels/transpose_kernel.cc \
@@ -170,6 +178,7 @@ LIBGE_LOCAL_SRC_FILES := \
    host_kernels/sub_kernel.cc \
    host_kernels/transdata_kernel.cc \
    host_kernels/unpack_kernel.cc \
    host_kernels/reformat_kernel.cc \
    graph/passes/folding_pass.cc \
    graph/passes/get_original_format_pass.cc \
    graph/passes/guarantee_const_pass.cc \
@@ -306,7 +315,6 @@ LIBGE_LOCAL_SRC_FILES := \
 LIBCLIENT_LOCAL_SRC_FILES := \
    proto/ge_api.proto \
    client/ge_api.cc \
    client/ge_prof.cc \

 RUNNER_LOCAL_C_INCLUDES := \
    $(LOCAL_PATH) ./ \
@@ -371,7 +379,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

 LOCAL_STATIC_LIBRARIES := libge_memory \
                          libadump_server \
                          libmsprofiler \
                          libmsprofiler_fwk \
                          libmmpa \

 LOCAL_SHARED_LIBRARIES := \
@@ -381,7 +389,6 @@ LOCAL_SHARED_LIBRARIES := \
    libgraph \
    libregister \
    libge_common \
    libmsprof \
    liberror_manager \

 LOCAL_LDFLAGS := -lrt -ldl
@@ -408,7 +415,6 @@ endif
 LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

 LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \
                   ../../out/ge/lib64/stub/ge_prof.cc \
                   ../../out/ge/lib64/stub/ge_ir_build.cc \

 LOCAL_SHARED_LIBRARIES :=
@@ -464,7 +470,6 @@ LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libmsprof \

 LOCAL_LDFLAGS := -lrt -ldl

@@ -497,7 +502,6 @@ LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libmsprof \

 LOCAL_LDFLAGS := -lrt -ldl

--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -23,10 +23,13 @@ add_library(ge_runtime SHARED ${GE_SRC_LIST})
 target_compile_options(ge_runtime PRIVATE
    -Werror
    -O2
    -Wno-deprecated-declarations
    -fno-common
 )

 target_compile_definitions(ge_runtime PRIVATE 
    PROTOBUF_INLINE_NOT_IN_HEADERS=0
    LOG_CPP
 )

 target_include_directories(ge_runtime PRIVATE
--- a/ge/ge_runtime/runtime_model.cc
+++ b/ge/ge_runtime/runtime_model.cc
@@ -28,15 +28,16 @@

 namespace ge {
 namespace model_runner {
 const int kOffsetUnit = 8;
 RuntimeModel::~RuntimeModel() {
  GELOGI("RuntimeModel destructor start");

  // Release task first, hccl task hold stream
  task_list_.clear();

  // Unbind rtModel from all task related streams
  RtModelUnbindStream();

  // Release task first, hccl task hold stream
  task_list_.clear();

  // Release all task related streams
  RtStreamDestory();

@@ -495,7 +496,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr<DavinciModel> &davinci_model
        return false;
      }
      uint64_t *buff = reinterpret_cast<uint64_t *>(const_cast<char *>(constant->weight_data.data()));
      int64_t offset = elem_num * 8;
      int64_t offset = elem_num * kOffsetUnit;
      uintptr_t hbm_raw_data_base_addr = reinterpret_cast<uintptr_t>(constant->output_addrs[0]) + offset;
      for (int64_t i = elem_num - 1; i >= 0; --i) {
        buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]);
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -47,6 +47,8 @@ const char *const kEngineNameDefault = "default";
 const char *const kVectorEngine = "VectorEngine";
 const char *const kAIcoreEngine = "AIcoreEngine";
 const char *const kFileNameSuffix = "online";
 const size_t kDynamicDimSize = 1;
 const int64_t kDynamicDimValue = -2;

 std::map<ge::OpEngineType, std::string> engine_type_map{
    {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}};
@@ -156,7 +158,12 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen
  }

  string op_type;
  if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) {
  bool is_const = false;
  (void)AttrUtils::GetBool(tensor, CONST_ATTR_NAME_INPUT, is_const);
  if (is_const) {
    GELOGD("Get input[%d] is const", index);
    op_type = CONSTANTOP;
  } else if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) {
    op_type = DATA;
  }

@@ -165,6 +172,18 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen
  if (data_op == nullptr) {
    return FAILED;
  }
  if (is_const) {
    ConstGeTensorPtr tensor_value;
    if (!AttrUtils::GetTensor(tensor, ge::ATTR_NAME_WEIGHTS, tensor_value)) {
      GELOGE(FAILED, "Get value failed, node name:%s.", tensor.GetName().c_str());
      return FAILED;
    }
    if (!AttrUtils::SetTensor(data_op, ge::ATTR_NAME_WEIGHTS, tensor_value)) {
      GELOGE(FAILED, "Set attr ATTR_NAME_WEIGHTS fail.");
      return FAILED;
    }
  }

  (void)AttrUtils::SetBool(data_op, "_is_single_op", true);

  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
@@ -231,6 +250,61 @@ static void GetOpsProtoPath(string &opsproto_path) {
  opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
 }

 static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag) {
  GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
  change_shape_flag = false;
  for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
    auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
    GE_CHECK_NOTNULL(input_desc);
    // pass scalar input desc
    auto dims = input_desc->GetShape().GetDims();
    if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) {
      change_shape_flag = true;
    }
  }
  for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) {
    auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
    GE_CHECK_NOTNULL(output_desc);
    // pass scalar output desc
    auto dims = output_desc->GetShape().GetDims();
    if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) {
      change_shape_flag = true;
    }
  }
  return SUCCESS;
 }

 static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
  for (auto input : inputs) {
    auto input_desc = input.GetTensorDesc();
    GeShape shape_ori = input_desc.GetShape();

    std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue};
    GeShape dynamic_shape(dynamic_shape_dims);
    std::vector<std::pair<int64_t, int64_t>> dynamic_shape_range;

    ge::GeTensor inputTensor;
    ge::GeTensorDesc desc(input_desc);

    bool is_const = false;
    (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
    if (!is_const && shape_ori.GetDims().size() > 0) {
      int64_t storage_format = FORMAT_NCHW;
      if (ge::AttrUtils::GetInt(desc, ge::ATTR_NAME_STORAGE_FORMAT, storage_format) &&
          !ge::AttrUtils::SetListInt(desc, ge::ATTR_NAME_STORAGE_SHAPE, dynamic_shape_dims)) {
        GELOGE(FAILED, "Set attr ATTR_NAME_STORAGE_SHAPE fail.");
        return FAILED;
      }
      desc.SetShape(dynamic_shape);
      desc.SetShapeRange(dynamic_shape_range);
    }

    inputTensor.SetTensorDesc(desc);
    inputs_dynamic.push_back(inputTensor);
  }
  return SUCCESS;
 }

 class GeGenerator::Impl {
 public:
  Impl(OmgContext &omg_context) : omg_context_(omg_context) {}
@@ -240,6 +314,8 @@ class GeGenerator::Impl {

  Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model);

  Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff);

  Status SaveParams(GeModelPtr &ge_model, const string &type, const map<string, GeAttrValue> &attrs,
                    const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);

@@ -260,6 +336,7 @@ class GeGenerator::Impl {
  bool GetVersionFromPath(const std::string &file_path, std::string &version);
  bool SetAtcVersionInfo(AttrHolder &obj);
  bool SetOppVersionInfo(AttrHolder &obj);
  bool SetOmSystemInfo(AttrHolder &obj);
 };

 Status GeGenerator::Initialize(const map<string, string> &options) {
@@ -470,6 +547,30 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
  return true;
 }

 bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) {
  std::string soc_version;
  (void)ge::GetContext().GetOption(ge::SOC_VERSION, soc_version);
  GELOGI("SetOmSystemInfo soc_version: %s", soc_version.c_str());
  if (!ge::AttrUtils::SetStr(obj, "soc_version", soc_version)) {
    GELOGW("SetStr of soc_version failed.");
    return false;
  }

  std::string framework_type;
  (void)ge::GetContext().GetOption(ge::FRAMEWORK_TYPE, framework_type);
  GELOGI("SetOmSystemInfo framework_type: %s", framework_type.c_str());
  auto iter = ge::kFwkTypeToStr.find(framework_type);
  if (iter == ge::kFwkTypeToStr.end()) {
    GELOGW("Can not find framework_type in the map.");
    return false;
  }
  if (!ge::AttrUtils::SetStr(obj, "framework_type", iter->second)) {
    GELOGW("SetStr of framework_type failed.");
    return false;
  }
  return true;
 }

 Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
                                  ModelBufferData &model, bool is_offline) {
  rtContext_t ctx = nullptr;
@@ -507,17 +608,18 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
  ModelHelper model_helper;
  string model_name = "";
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name);
  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
                                                                 model_name);
  if (name_ret != SUCCESS) {
    ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid");
    GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
    return PARAM_INVALID;
  }
  map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null");
  GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
  ge_model->SetName(model_name);
  ret = impl_->SaveModel(file_name_prefix, ge_model, model);
  ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
  if (ret != SUCCESS) {
    GELOGE(ret, "Save model failed");
    if (impl_->graph_manager_.Finalize() != SUCCESS) {
@@ -567,6 +669,9 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
 Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                                  const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                                  bool is_offline) {
  if (!is_offline) {
    (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true);
  }

  if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) {
    GELOGE(PARAM_INVALID, "input param is invalid when build single op!");
@@ -594,40 +699,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in

  // 2. Create ComputeGraph.
  string name = ge::CurrentTimeInStr() + "_" + model_file_name;
  ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(name);
  GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR);

  // 3. Add Node to ComputeGraph.
  NodePtr op_node = compute_graph->AddNode(op_desc);
  GE_CHECK_NOTNULL_EXEC(op_node, return INTERNAL_ERROR);

  // 4. Create InputData node.
  int32_t arg_index = 0;
  if (inputs.empty()) {
    for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
      GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR);
      if (!IsNeedConnectInputOpForSingleOp(*input_desc)) {
        continue;
      }
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false));
      arg_index++;
    }
  } else {
    for (const auto &in_desc : inputs) {
      GeTensorDesc input_desc = in_desc.GetTensorDesc();
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
      arg_index++;
    }
  }

  // 5. Create Output node.
  if (!outputs.empty()) {
    GE_CHK_STATUS_RET_NOLOG(AddOutputs(compute_graph, op_node, outputs));
  Graph graph;
  if (BuildSingleOpGraph(op_desc, inputs, outputs, name, graph) != ge::SUCCESS) {
    GELOGE(GRAPH_FAILED, "make graph fail.");
    return GRAPH_FAILED;
  }

  // dump ComputeGraph.
  compute_graph->Dump();
  Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph);
  GELOGI("ATC parser success in single op build.");

  GeRootModelPtr ge_root_model = nullptr;
@@ -644,7 +720,18 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  }
  GeModelPtr &ge_model = name_to_ge_model.begin()->second;
  GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
  GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));

  bool dynamic_flag = false;
  if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) {
    vector<GeTensor> inputs_dynamic;
    vector<GeTensor> outputs_dynamic;
    GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic));
    GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
    GE_CHK_STATUS_RET_NOLOG(
      impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
  } else {
    GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
  }
  GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
  return SUCCESS;
 }
@@ -683,6 +770,46 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
  return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
 }

 Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                       const vector<GeTensor> &outputs, std::string graph_name, Graph &graph) {
  ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name);
  GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR);

  // 1. Add Node to ComputeGraph.
  NodePtr op_node = compute_graph->AddNode(op_desc);
  GE_CHECK_NOTNULL_EXEC(op_node, return INTERNAL_ERROR);

  // 2. Create InputData node.
  int32_t arg_index = 0;
  if (inputs.empty()) {
    for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
      GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR);
      if (!IsNeedConnectInputOpForSingleOp(*input_desc)) {
        continue;
      }
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false));
      arg_index++;
    }
  } else {
    for (const auto &in_desc : inputs) {
      GeTensorDesc input_desc = in_desc.GetTensorDesc();
      GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
      arg_index++;
    }
  }

  // 3. Create Output node.
  if (!outputs.empty()) {
    GE_CHK_STATUS_RET_NOLOG(AddOutputs(compute_graph, op_node, outputs));
  }

  // dump ComputeGraph node.
  compute_graph->Dump();
  graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph);

  return SUCCESS;
 }

 Status GeGenerator::Impl::SaveParams(GeModelPtr &ge_model, const string &type, const map<string, GeAttrValue> &attrs,
                                     const vector<GeTensor> &inputs, const vector<GeTensor> &outputs) {
  GE_CHECK_NOTNULL_EXEC(ge_model, return PARAM_INVALID);
@@ -712,6 +839,47 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &
  return SUCCESS;
 }

 Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootModelPtr &ge_root_model,
                                        ModelBufferData &model_buff) {
  bool is_unknown_shape = false;
  auto ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape);
  if (ret != SUCCESS) {
    GELOGE(FAILED, "Check root model is unkonwn shape failed");
    return FAILED;
  }
  GELOGD("begin save root model, cur model is unkonwn shape model ? : %d", is_unknown_shape);
  GE_CHK_BOOL_EXEC(!ge_root_model->GetSubgraphInstanceNameToModel().empty(), return FAILED,
                   "ge root model has no sub model")
  GeModelPtr model_root = nullptr;
  if (is_unknown_shape) {
    model_root = make_shared<GeModel>();
    model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph()));
    ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root);
    model_root->SetName(ge_root_model->GetRootGraph()->GetName());
  } else {
    model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second;
  }
  // set atc version
  if (!SetAtcVersionInfo(*(model_root.get()))) {
    GELOGW("SetPackageVersionInfo of atc failed!");
  }
  // set opp version
  if (!SetOppVersionInfo(*(model_root.get()))) {
    GELOGW("SetPackageVersionInfo of ops failed!");
  }
  if (!SetOmSystemInfo(*(model_root.get()))) {
    GELOGW("SetOmsystemInfo failed!");
  }
  ModelHelper model_helper;
  model_helper.SetSaveMode(is_offline_);
  ret = model_helper.SaveToOmRootModel(ge_root_model, save_param_, file_name_prefix, model_buff, is_unknown_shape);
  if (ret != SUCCESS) {
    GELOGE(ret, "Save to om model failed");
    return ret;
  }
  return SUCCESS;
 }

 Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs,
                                     GeRootModelPtr &ge_root_model) {
  static std::atomic<GraphId> atomic_graph_id(0);
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -15,6 +15,7 @@
 */

 #include "graph/build/graph_builder.h"
 #include "graph/build/memory/graph_mem_assigner.h"
 #include "common/ge/ge_util.h"
 #include "common/helper/model_helper.h"
 #include "graph/build/logical_stream_allocator.h"
@@ -200,7 +201,7 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo
  bool is_dynamic_shape = false;
  // To be compatible with the old process, do not verify the return value temporarily.
  (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
  if (is_dynamic_shape) {
  if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) {
    GE_CHK_STATUS_RET(
        BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id),
        "Build for dynamic shape graph failed.");
@@ -270,16 +271,78 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v
  return SUCCESS;
 }

 Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) {
  for (auto &node : comp_graph->GetDirectNode()) {
    GE_CHECK_NOTNULL(node);
    auto op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    auto num_inputs = op_desc->GetInputsSize();
    std::vector<int64_t> input_offsets(num_inputs, 0);
    int valid_input_index = -1;
    for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) {
      auto in_anchor = node->GetInDataAnchor(i);
      auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
      if (peer_out_anchor == nullptr) {
        continue;
      }

      ++valid_input_index;
      auto peer_node = peer_out_anchor->GetOwnerNode();
      if (peer_node == nullptr) {
        continue;
      }

      if (peer_node->GetType() != CONSTANT) {
        continue;
      }

      std::vector<GeTensorPtr> weights = OpDescUtils::MutableWeights(peer_node);
      if (weights.empty()) {
        GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
        return FAILED;
      }
      GeTensorPtr weight = weights[0];
      GE_CHECK_NOTNULL(weight);
      int64_t input_offset = 0;
      (void) TensorUtils::GetDataOffset(weight->MutableTensorDesc(), input_offset);
      // valid_input_index must smaller than num_inputs
      input_offsets[valid_input_index] = input_offset;
      GELOGD("[%s] input[%u] is const, offset = %ld", node->GetName().c_str(), valid_input_index, input_offset);
    }

    op_desc->SetInputOffset(input_offsets);
    std::vector<int64_t> output_offsets(op_desc->GetOutputsSize(), 0);
    op_desc->SetOutputOffset(output_offsets);
  }
  return SUCCESS;
 }

 Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                               uint64_t session_id) {
  GELOGI("Begin to build unknown shape graph[%s].", comp_graph->GetName().c_str());
  Graph2SubGraphInfoList subgraph_map;
  ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
  GE_DUMP(comp_graph, "BeforePreBuildModel");
  GE_TIMESTAMP_START(PreBuildModel);
  GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.",
                    comp_graph->GetName().c_str());
  GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
  GE_DUMP(comp_graph, "AfterPreBuildModel");

  GE_TIMESTAMP_START(CalcOpParam);
  GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.",
                    comp_graph->GetName().c_str());
  GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam");
  GE_DUMP(comp_graph, "AfterCalcOpParam");
  Graph2SubGraphInfoList subgraph_map;
  ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);

  GE_TIMESTAMP_START(SetConstantInputOffset);
  GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph),
                    "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str());
  GE_TIMESTAMP_END(SetConstantInputOffset, "GraphBuilder::SetConstantInputOffset");
  GE_TIMESTAMP_START(MergeWeights);
  GE_CHK_STATUS_RET(builder.MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str());
  GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights");

  ModelPtr model_ptr = MakeShared<ge::Model>();
  if (model_ptr == nullptr) {
    return MEMALLOC_FAILED;
@@ -349,7 +412,8 @@ static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph
          GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
          std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
          if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
            GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str());
            GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
                   in_node->GetName().c_str(), node->GetName().c_str());
            return FAILED;
          }
        }
@@ -359,6 +423,52 @@ static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph
  return SUCCESS;
 }

 Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
  bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
  com_graph->SetGraphUnknownFlag(false);

  GELOGD("Start to mark profiling task attr for fp and bp.");
  TaskGenerator task_generator;
  ProfilingPoint profiling_point;
  std::vector<uint32_t> all_reduce_node_index;
  Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index);
  com_graph->SetGraphUnknownFlag(original_unknown_shape_flag);
  if (ret != SUCCESS) {
    GELOGW("Find profiling node index failed.");
  }
  if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) {
    GELOGD("No need to mark fp bp profiling task attr.");
    return SUCCESS;
  }
  // mark profiling task attr for node
  uint32_t node_index = 0;
  for (const auto &node : com_graph->GetAllNodes()) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(node->GetOpDesc());
    node_index++;
    if (profiling_point.fp_index == node_index) {
       GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true);
    }
    if (profiling_point.bp_index == node_index) {
      GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
    }
    for (size_t i = 0; i < all_reduce_node_index.size(); i++) {
      if (all_reduce_node_index[i] == node_index) {
        GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
        (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
        continue;
      }
    }
    if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {
      GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
      (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true);
    }
  }
  return SUCCESS;
 }

 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                               std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                               GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@@ -374,10 +484,21 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                        op_desc->GetName().c_str());
    }
  }
  //
  for (auto &sub_graph : comp_graph->GetAllSubgraphs()) {

  // Set fp bp profiling task attr for graph
  if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) {
    GELOGE(FAILED, "Set fp bp profiling task attr for graph.");
    return FAILED;
  }

  auto all_graphs = comp_graph->GetAllSubgraphs();
  if (all_graphs.empty()) {
    all_graphs.push_back(comp_graph);
  }
  for (auto &sub_graph : all_graphs) {
    // exclude functional subgraph in known subgraph
    if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
    if (sub_graph->GetParentGraph() != nullptr && sub_graph->GetParentGraph() != comp_graph &&
        !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
      continue;
    }

@@ -475,7 +596,7 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr
 }

 Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) {
  // set input_desc.size = src_node.output_desc.size
  // Set the size of input_desc to 'src_node.output_desc.size'
  if (node_ptr->GetType() == DATA) {
    bool is_unknown_shape = false;
    GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape),
@@ -498,7 +619,7 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) {
    GE_IF_BOOL_EXEC(src_op == nullptr, continue);
    auto node_op_desc = node_ptr->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
    // set dst_node.input_desc = src_node.output_desc
    // Set the input_desc of dst_node to 'src_node.output_desc'
    auto output_desc = src_op->GetOutputDescPtr(peer_out_anchor->GetIdx());
    int64_t size = 0;
    GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed!"));
@@ -512,7 +633,6 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) {
    auto input_desc = node_op_desc->MutableInputDesc(in_data_anchor->GetIdx());
    GE_CHECK_NOTNULL(input_desc);
    (void) ge::TensorUtils::SetSize(*input_desc, size);
    GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc));
    GELOGD("%s input desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", node_ptr->GetName().c_str(),
           input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(),
           TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str());
--- a/ge/graph/build/graph_builder.h
+++ b/ge/graph/build/graph_builder.h
@@ -60,6 +60,7 @@ class GraphBuilder {
  Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr);
  Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc);
  Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list);
  Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph);
  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                   GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
@@ -67,6 +68,7 @@ class GraphBuilder {
                                 GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
  Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
  Status SetConstantInputOffset(ComputeGraphPtr &comp_graph);
  Status AddOutputMemTypeForNode(const NodePtr &node);
  Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                              uint64_t session_id = INVALID_SESSION_ID);
--- a/ge/graph/build/memory/CMakeLists.txt
+++ b/ge/graph/build/memory/CMakeLists.txt
@@ -1,42 +0,0 @@
 set(SRC_LIST
    "memory_assigner.cc"
    "graph_mem_assigner.cc"
    "binary_block_mem_assigner.cc"
    "block_mem_assigner.cc"
    "hybrid_mem_assigner.cc"
    "max_block_mem_assigner.cc"
    "var_mem_assign_util.cc"
 )

 ############ libge_memory.a ############
 add_library(ge_memory STATIC ${SRC_LIST})

 target_compile_options(ge_memory PRIVATE
    -Werror
    -O2
 )

 target_compile_definitions(ge_memory PRIVATE
    google=ascend_private
 )

 target_link_libraries(ge_memory PRIVATE
    $<BUILD_INTERFACE:intf_pub>
    ascend_protobuf
    c_sec
 )

 target_include_directories(ge_memory PRIVATE
    ${CMAKE_CURRENT_LIST_DIR}
    ${GE_CODE_DIR}/ge
    ${GE_CODE_DIR}/inc
    ${GE_CODE_DIR}/inc/external
    ${METADEF_DIR}/inc
    ${METADEF_DIR}/inc/external
    ${METADEF_DIR}/inc/external/graph
    ${GE_CODE_DIR}/inc/framework
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
    #### blue zone ####
    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
--- a/ge/graph/build/memory/binary_block_mem_assigner.cc
+++ b/ge/graph/build/memory/binary_block_mem_assigner.cc
@@ -21,8 +21,8 @@
 namespace {
 const uint32_t kRangeCeilInterval = 2;
 const uint32_t kLogBase = 2;
 const int64_t kLargeBlockSize = 8 * 1024 * 1024;
 const int64_t kLargeBlockRangeSize = 10;
 const int64_t kLargeBlockSize = 8388608;   // 8 * 1024 * 1024
 const int64_t kLargeBlockRangeSize = 2;
 }  // namespace

 namespace ge {
@@ -69,19 +69,21 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
    GELOGW("Vector all_memory_size is empty!");
    return SUCCESS;
  }
  if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) {
    GELOGE(FAILED, "dividend is 0!");
  if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) {
    GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front());
    return FAILED;
  }
  // Memory size is 512 aligned, so it is not necessary to take less than 512
  int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front();
  auto range_number = static_cast<size_t>(
    ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase)));
    ceil(log(all_memory_size.back() / static_cast<double>(min_memory_size)) / log(kLogBase)));
  range_number = (range_number == 0) ? 1 : range_number;
  GELOGD("Range number: %zu", range_number);

  vector<vector<int64_t>> ranges(range_number);
  GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
  size_t range_number_limit = all_memory_size.size() / range_number;
  int64_t range_ceil = all_memory_size[0];
  int64_t range_ceil = min_memory_size;
  for (size_t i = 1; i <= range_number; i++) {
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
                    GELOGE(FAILED, "Multiply result is out of range.");
@@ -114,7 +116,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
      range_ceils.push_back(range.back());
    }
  }
  GELOGD("Range ceils: %s", ToString(range_ceils).c_str());
  GELOGI("Range ceils: %s", ToString(range_ceils).c_str());

  return SUCCESS;
 }
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -39,14 +39,15 @@ using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;
 enum OpMemoryType { kOutput, kWorkspace };

 struct NodeTypeIndex {
  NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false)
      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {}
  NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0)
      : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {}

  ge::NodePtr node = nullptr;
  OpMemoryType mem_type = kOutput;
  uint32_t index = 0;
  size_t life_time_end = kMaxLifeTime;
  bool ref_input = false;
  size_t life_time_begin = 0;
  size_t life_time_end = kMaxLifeTime;
  const string GetMemType() const {
    if (mem_type == kOutput) {
      return "output";
@@ -55,6 +56,34 @@ struct NodeTypeIndex {
    }
    return "unknown";
  }

  size_t GetLifeBegin() const {
    if ((node == nullptr) || (node->GetOpDesc() == nullptr)) {
      return 0;
    }

    if ((life_time_begin > 0) && (life_time_begin < static_cast<size_t>(node->GetOpDesc()->GetId()))) {
      return life_time_begin;
    } else {
      return node->GetOpDesc()->GetId();
    }
  }

  std::string GetLifeBeginDesc() const {
    if (node == nullptr) {
      return "";
    }
    auto node_op_desc = node->GetOpDesc();
    if (node_op_desc != nullptr) {
      auto life_begin = GetLifeBegin();
      if (life_begin != static_cast<size_t>(node_op_desc->GetId())) {
        return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId());
      } else {
        return std::to_string(node_op_desc->GetId());
      }
    }
    return "";
  }
 };

 class MemoryBlock {
@@ -65,6 +94,7 @@ class MemoryBlock {
        stream_id_(stream_id),
        deleted_block_(false),
        reuse_mem_(reuse_mem),
        same_stream_(true),
        input_index_(0),
        continuous_block_(false),
        first_continuous_block_(false),
@@ -85,13 +115,14 @@ class MemoryBlock {
    symbol_list_.clear();
  }

  void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) {
    real_size_list_.emplace_back(real_size);
    no_align_size_list_.emplace_back(no_align_size);
    node_type_index_list_.emplace_back(node, type, out_index, false);
  }
  size_t Size() const { return block_size_; }

  void SetSize(size_t size) {
    if (size > block_size_) {
      block_size_ = size;
    }
  }

  size_t AlignSize() const;

  void SetHeadOffset(size_t offset);
@@ -106,6 +137,12 @@ class MemoryBlock {
    node_type_index_list_.emplace_back(node_type_index);
    real_size_list_.emplace_back(real_size);
    no_align_size_list_.emplace_back(no_align_size);
    if ((node_type_index.node != nullptr) && (node_type_index.node->GetOpDesc() != nullptr)) {
      auto stream_id = node_type_index.node->GetOpDesc()->GetStreamId();
      if (stream_id != stream_id_) {
        same_stream_ = false;
      }
    }
  }

  void AddSymbol(const std::string &symbol) {
@@ -122,7 +159,7 @@ class MemoryBlock {

  std::string String();

  bool IsSameLabel(std::string &first_batch_label);
  bool IsSameBatchLabel();

  void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life);

@@ -132,7 +169,7 @@ class MemoryBlock {

  size_t GetLifeBegin();

  size_t GetLifeEnd();
  size_t GetLifeEnd() const;

  void AddDependLifeBegin(DependStreamLife &node_depend_stream_life);

@@ -142,6 +179,7 @@ class MemoryBlock {
  int64_t stream_id_;
  bool deleted_block_;
  bool reuse_mem_;
  bool same_stream_;
  uint32_t input_index_;
  bool continuous_block_;
  bool first_continuous_block_;
@@ -149,6 +187,7 @@ class MemoryBlock {
  bool is_zero_copy_;
  std::map<int64_t, size_t> depend_stream_life_;
  int64_t memory_type_;
  std::string batch_label_;
 private:
  size_t block_size_;
  std::vector<size_t> real_size_list_;
@@ -199,6 +238,7 @@ class BlockMemAssigner : public MemAssigner {

  void SetOpMemOffset(bool is_zero_copy);

  std::string GetMaxBatchLabel() const { return max_batch_label_; }
 protected:
  ///
  /// @ingroup domi
@@ -209,7 +249,7 @@ class BlockMemAssigner : public MemAssigner {

  void GetOutAndWorkSpaceMem(std::vector<int64_t> &all_memory_size);

  void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory);
  void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory, int64_t &total_size);

  ///
  /// @ingroup GE
@@ -353,7 +393,7 @@ class BlockMemAssigner : public MemAssigner {
  /// @return void
  /// @author
  ///
  void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory);
  void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, bool same_stream = true);

  ///
  /// @ingroup GE
@@ -379,11 +419,11 @@ class BlockMemAssigner : public MemAssigner {

  ///
  /// @ingroup GE
  /// @brief Merge memory blocks between different batchs
  /// @brief Resize memory blocks for each batchs
  /// @return merge or not
  /// @author
  ///
  bool MergeDynamicBatchBlocks();
  void ResizeDynamicBatchBlocks();

  void AssignContinuousBlocks();

@@ -392,6 +432,7 @@ class BlockMemAssigner : public MemAssigner {
  bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
                                   uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag);

  bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index);
  ///
  /// @ingroup GE
  /// @|+++++++++block1++++++++|                               |+++++++++block1++++++++|
@@ -411,8 +452,6 @@ class BlockMemAssigner : public MemAssigner {

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;

  std::map<std::string, uint64_t> reusable_block_counts_;

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_;

  std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_;
@@ -436,6 +475,18 @@ class BlockMemAssigner : public MemAssigner {

  int64_t atomic_addr_clean_id_ = 0;

  size_t theory_min_memory_size_ = 0;

  size_t theory_memory_size_ = 0;

  std::string max_batch_label_;

  size_t continuous_life_begin_ = 0;
  ///
  /// @          [stream1][nodeid]
  /// @[nodeid]  [stream2][nodeid]
  /// @          [stream2][nodeid]
  ///
  DependStreamLife total_node_depend_stream_life_;
 };
 }  // namespace ge
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
--- a/ge/graph/build/memory/graph_mem_assigner.h
+++ b/ge/graph/build/memory/graph_mem_assigner.h
@@ -119,31 +119,15 @@ class GraphMemoryAssigner {
  ///
  ge::Status ReAssignContinuousMemory(bool is_loop_graph);

  ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory();

  ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory();

  ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse);

  ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse);

  ge::Status ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map, int32_t mem_reuse_model);

  ge::Status GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
                              int32_t mem_reuse_model, string &max_batch_label);

  ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index,
                                               int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size);

  ge::Status ReAssignAtomicMemory(bool is_loop_graph);
  
  ge::Status FilterAtomicNodesForMemoryAssign(std::map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
                                              std::vector<NodePtr> &connecting_output_atomic_nodes);

  ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
                                              map<string, vector<NodePtr>> &connecting_output_atomic_nodes);

  ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
                                         int64_t &continuous_mem_size, int64_t memory_type);
                                         int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type);

  ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node);
  ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type);

  ///
  /// @brief check the input of node whether support atomic attr
@@ -169,10 +153,10 @@ class GraphMemoryAssigner {
  ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes);

  ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
                                      const std::vector<int64_t> &mem_offset_end);
                                      const std::vector<int64_t> &mem_offset_end, int64_t memory_type);

  ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start,
                                const std::vector<int64_t> &atomic_mem_size);
                                const std::vector<int64_t> &atomic_mem_size, int64_t memory_type);

  ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node);

--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -55,15 +55,8 @@ using std::vector;
 namespace {
 const uint32_t kWeightsStartOffset = 512;
 const int32_t kWrongIndex = -2;

 const float kImgRatioYUV420SP_U8 = 1.5;
 const int kImgRatioRGB888_U8 = 3;
 const int kImgRatioNC1HWC0DI_FP16 = 12;
 const int kInvalidIndexNum = -1;

 const uint32_t kInputDimensions2D = 2;
 const uint32_t kInputDimensions3D = 3;

 const char *const kVectorCore = "VectorCore";
 const char *const kCoreType = "ge.engineType";
 const std::string kEnableL1Fusion = "ge.l1Fusion";
@@ -224,6 +217,7 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_
    GeTensorDesc &tensor_desc = weight->MutableTensorDesc();
    size_t output_size = weight->GetData().size();
    TensorUtils::SetDataOffset(tensor_desc, mem_offset);
    GELOGD("Node: %s, weight size: %zu.", node->GetName().c_str(), output_size);
    mem_offset += output_size;
  }
  return SUCCESS;
@@ -282,7 +276,7 @@ Status ModelBuilder::SetInputOutputDesc() {
 void ModelBuilder::AddNodeInputProperty() {
  for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
    auto node_op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return );
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return);
    vector<string> src_name_list;
    vector<int64_t> src_index_list;
    for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
@@ -309,10 +303,10 @@ void ModelBuilder::AddNodeInputProperty() {

  for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
    auto node_op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return );
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return);
    GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue);
    auto out_control_anchor = node->GetOutControlAnchor();
    GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return );
    GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return);
    vector<string> dst_name_list;
    vector<int64_t> dst_index_list;
    string dst_name_temp;
@@ -330,7 +324,7 @@ void ModelBuilder::AddNodeInputProperty() {
      dst_name_temp = "";
      int64_t dst_index = kWrongIndex;  // assign an impossible value to dst_index.
      for (const auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
        GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return );
        GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return);
        ge::NodePtr dst_node = in_data_anchor->GetOwnerNode();
        dst_name_temp = dst_name_temp.empty() ? dst_node->GetName() : dst_name_temp + ":" + dst_node->GetName();
        dst_index = in_data_anchor->GetIdx();
@@ -568,7 +562,7 @@ Status ModelBuilder::MergeWeights() {
        return FAILED;
      }
    }
    weight_data.clear();
    weight->ClearData();
  }

  return SUCCESS;
@@ -581,9 +575,13 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
  // Add TBE Kernels and custom aicpu op bin
  std::set<std::string> tbe_name_set;
  std::set<std::string> aicpu_name_set;
  std::set<std::string> aicpu_op_types;
  std::set<std::string> aicpu_tf_op_types;
  for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
    auto node_op_desc = n->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
    // check aicpu op type
    CollectCheckAicpuAttr(node_op_desc, aicpu_op_types, aicpu_tf_op_types);
    TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
    if (tbe_kernel == nullptr) {
      std::string kernel_name;
@@ -605,6 +603,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
    tbe_kernel_store_.AddTBEKernel(tbe_kernel);
  }

  SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);

  for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
    auto node_op_desc = n->GetOpDesc();
    GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -796,4 +796,51 @@ Status ModelBuilder::CompileSingleOp() {
  GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp");
  return ge::SUCCESS;
 }

 void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
                                         std::set<std::string> &aicpu_tf_op_types) {
  std::string aicpu_optype;
  bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
  std::vector<std::string> tf_optypes;
  bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
  if (has_attr_check_cpu && !aicpu_optype.empty()) {
    aicpu_op_types.insert(aicpu_optype);
  }

  if (has_attr_check_tf && !tf_optypes.empty()) {
    aicpu_tf_op_types.insert(tf_optypes.begin(), tf_optypes.end());
  }

  return;
 }

 void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
                                          std::set<std::string> &aicpu_tf_op_types) {
  std::vector<std::string> aicpu_optype_list;
  std::vector<std::string> aicpu_tf_optype_list;
  if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
    GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
    aicpu_op_types.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
  }

  if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) {
    GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
    aicpu_tf_op_types.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
  }

  // reset list with set
  aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end());
  aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end());
  GELOGI(
    "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, "
    "aicpu_tf_optype_list:%zu.",
    compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
    aicpu_tf_optype_list.size());
  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return,
                   "Set attr needCheckCpu fail.");

  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return,
                   "Set attr needCheckTf fail.");
  return;
 }
 }  // namespace ge
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -55,13 +55,13 @@ class ModelBuilder {

  ge::Buffer GetWeightBuffer() const;

  Status MergeWeights();

 protected:
  void AddNodeInputProperty();

  void ClearOriginalFormat();

  Status MergeWeights();

 private:
  bool SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector<bool> &is_input_const);

@@ -83,6 +83,12 @@ class ModelBuilder {

  Status CompileSingleOp();

  void CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
                               std::set<std::string> &aicpu_tf_op_types);

  void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
                                std::set<std::string> &aicpu_tf_op_types);

  uint64_t session_id_;

  map<int64_t, size_t> mem_type_to_mem_offset_;
--- a/ge/graph/build/stream_allocator.cc
+++ b/ge/graph/build/stream_allocator.cc
@@ -34,7 +34,6 @@ using std::string;
 using std::vector;

 namespace {
 const uint32_t kMaxSwitchStreamNum = 1;
 const int64_t kTaskNumPerNormalNode = 3;
 const int64_t kTaskNumPerHcclNode = 200;
 const char *const kTrueStr = "true";
@@ -49,7 +48,8 @@ inline bool HasContinuousStreamLabel(const ge::OpDescPtr &op_desc, std::string &
 }

 bool IsHcclOp(const string &op_type) {
  const set<string> hccl_op_types({ge::HCOMBROADCAST, ge::HCOMALLGATHER, ge::HCOMALLREDUCE, ge::HCOMREDUCESCATTER, ge::HCOMREDUCE});
  const set<string> hccl_op_types({ge::HCOMBROADCAST, ge::HCOMALLGATHER,
                                   ge::HCOMALLREDUCE, ge::HCOMREDUCESCATTER, ge::HCOMREDUCE});
  return hccl_op_types.find(op_type) != hccl_op_types.end();
 }
 }  // namespace
--- a/ge/graph/build/stream_graph_optimizer.cc
+++ b/ge/graph/build/stream_graph_optimizer.cc
@@ -38,7 +38,7 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap
        continue;
      }
      for (ge::NodePtr &node : subgraph->GetDirectNode()) {
        GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return );
        GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return);
        if ((node->GetType() == END) || (node->GetType() == PLACEHOLDER)) {
          node->GetOpDesc()->SetId(static_cast<int64_t>(node_size));
          node_size++;
@@ -48,26 +48,41 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap
  }
 }

 bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) {
 bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) {
  if (comp_graph == nullptr) {
    return false;
  }
  std::set<int64_t> stream_set;
  std::set<std::string> label_set;
  for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) {
    GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue);
    int64_t stream_id = cur_node->GetOpDesc()->GetStreamId();
    if (stream_id == kInvalidStream) {
      continue;
    }
    GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(),
           comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
    stream_set.insert(stream_id);

    std::string batch_label;
    if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
      label_set.insert(batch_label);
    } else {
      GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
             cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
      continue;
    }

    GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
           comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
  }
  if (stream_set.size() > 1) {
    GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.",
  if (stream_set.size() > 1 || label_set.size() > 1) {
    GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
           comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size());
    return false;
  }

  if (!label_set.empty()) {
    (void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin());
  }
  return true;
 }

@@ -99,8 +114,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
          continue;
        }

        if (!IsSameStreamId(subgraph)) {
          GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str());
        if (!IsSameStreamIdOrBatchLabel(subgraph)) {
          GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str());
          continue;
        }
        OpDescPtr op_desc = nodes.at(0)->GetOpDesc();
@@ -112,9 +127,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
          return FAILED;
        }
        run_context.stream = run_context.graphStreamList[stream_id];
        GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.",
               subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
               static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)));
 	std::string batch_label;
 	(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
        GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
 	       "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
               static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());
        for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
          GE_CHECK_NOTNULL(*iter);
          Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);
--- a/ge/graph/build/stream_graph_optimizer.h
+++ b/ge/graph/build/stream_graph_optimizer.h
@@ -41,7 +41,7 @@ class StreamGraphOptimizer {
 private:
  void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map);

  bool IsSameStreamId(const ComputeGraphPtr &comp_graph);
  bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_
--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -49,14 +49,12 @@ const char *const kIsLastNode = "is_last_node";
 const char *const kIsInputVar = "INPUT_IS_VAR";
 const char *const kIsOutputVar = "OUTPUT_IS_VAR";
 const char *const kProfilingMode = "PROFILING_MODE";
 const char *const kProfilingFpPoint = "FP_POINT";
 const char *const kProfilingBpPoint = "BP_POINT";
 const uint32_t kProfilingArStep = 2;
 const uint64_t kProfilingFpStartLogid = 1;
 const uint64_t kProfilingBpEndLogid = 2;
 const uint64_t kProfilingArStartLogid = 3;
 const uint64_t kProfilingArEndLogid = 4;
 const uint64_t kProfilingIterEndLogid = 255;
 const uint64_t kProfilingIterEndLogid = 65535;
 const int64_t kHashFactor = 100000;
 const int64_t kInvalidGroupId = -1;
 }  // namespace
@@ -276,6 +274,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
  };
  GE_MAKE_GUARD(release, callback);

  uint64_t all_reduce_node_idx = 0;
  for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
    OpDescPtr op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
@@ -294,7 +293,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
    // Part2: Call
    auto fusion_task_info =
        FusionTaskInfo{run_context,        graph,         node,        op_desc,         node_index,      ge_lib,
                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes};
                       ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx};
    GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen),
                      "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str());
    // continue directly
@@ -318,7 +317,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
                      type.c_str());
    // Profiling task
    size_t task_list_size_before = task_def_list.size();
    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
    GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
                                                node_index, task_def_list, all_reduce_node_idx));
    int64_t op_id = op_desc->GetId();
    // Compatible with dynamic shape scenes, the default is 0
    int64_t stream_id = 0;
@@ -338,8 +338,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
      return ret;
    }
    // Profiling task
    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));

    GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
                                               node_index, task_def_list, all_reduce_node_idx));
    size_t task_list_size_after = task_def_list.size();
    // If tasks is reduced
    if (task_list_size_after < task_list_size_before) {
@@ -382,6 +382,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
  auto &op_name_map = fusion_task_info.op_name_map;
  auto &profiling_point = fusion_task_info.profiling_point;
  auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes;
  auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx;
  // If op_desc have this attr, call nodes with same group key in a stream together
  if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) &&
      (fusion_nodes_seen.count(node.get()) == 0)) {
@@ -428,7 +429,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
        return INTERNAL_ERROR;
      }
      // profiling task
      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
      (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
                                      node_index, task_def_list, all_reduce_idx);
      run_context.stream = run_context.graphStreamList[stream_id];
      GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.",
             op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
@@ -441,7 +443,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
        return ret;
      }
      // profiling task
      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
      (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
                                     node_index, task_def_list, all_reduce_idx);
      size_t task_list_size_after = task_def_list.size();
      // if tasks is reduced
      if (task_list_size_after < task_list_size_before) {
@@ -569,7 +572,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
      continue;
    }
    string op_type = op_desc->GetType();
    if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) {
    if ((!is_single_stream && !op_desc->GetSubgraphInstanceNames().empty()) || separator_types.count(op_type) != 0) {
      continuous_op_lists.emplace_back(vector<OpDescPtr>());
    } else {
      continuous_op_lists.back().emplace_back(op_desc);
@@ -810,40 +813,33 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint
                                   vector<uint32_t> &all_reduce_nodes, std::string &fp_point_str,
                                   std::string &bp_point_str) const {

  if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_FPPONIT_OPTIONS, fp_point_str) == SUCCESS &&
      ge::GetContext().GetOption(OPTION_EXEC_PROFILING_BPPONIT_OPTIONS, bp_point_str) == SUCCESS &&
      !fp_point_str.empty() && !bp_point_str.empty()) {
      return SUCCESS;
  }
  ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str);

  Status ret = SUCCESS;
  const char *fp_point = std::getenv(kProfilingFpPoint);
  if (fp_point == nullptr) {
  if (fp_point_str.empty()) {
    ret = AutoFindFpOpIndex(graph, profiling_point);
    if (ret != SUCCESS) {
      GELOGW("First forward profiling op_index not set and FindFpOpIndex failed.");
      return FAILED;
    }
  } else {
    fp_point_str = string(fp_point);
    GELOGI("Get fp_point_str from env %s", fp_point_str.c_str());
  }

  const char *bp_point = std::getenv(kProfilingBpPoint);
  if (bp_point == nullptr) {
  if (bp_point_str.empty()) {
    ret = AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes);
    if (ret != SUCCESS) {
      GELOGW("Last backward profiling op_index not set and FindBpOpIndex failed.");
      return FAILED;
    }
  } else {
    bp_point_str = string(bp_point);
    GELOGI("Get bp_point_str from env %s", bp_point_str.c_str());
  }

  return SUCCESS;
 }

 Status TaskGenerator::FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                             std::vector<uint32_t> &all_reduce_nodes) {
  return FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes);
 }

 Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                             vector<uint32_t> &all_reduce_nodes) const {
  GE_CHECK_NOTNULL(graph);
@@ -854,7 +850,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
    GELOGD("Profiling is not open.");
    return SUCCESS;
  }

  GELOGI("Start get FP/BP index.");
  std::string fp_point_str;
  std::string bp_point_str;
@@ -892,18 +887,27 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
  return SUCCESS;
 }


 Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                                vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                                vector<domi::TaskDef> &task_def_list) {
                                                vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) {
  const char *profiling_mode = std::getenv(kProfilingMode);
  bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                      ProfilingManager::Instance().ProfilingTrainingTraceOn();
  if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
      (profiling_point.end_index.empty())) {
  bool is_insert_fp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
  bool is_insert_bp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
  bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
                                   (profiling_point.end_index.empty())) &&
                                  (!(is_insert_fp_profiling_task || is_insert_bp_profiling_task));
  if (!is_profiling || no_insert_profiling_task) {
    return SUCCESS;
  }
  if (profiling_point.fp_index == node_index) {
  GELOGD("Insert fp profiling task: %d, insert bp profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
         is_insert_fp_profiling_task, is_insert_bp_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
         profiling_point.end_index.size());

  if ((profiling_point.fp_index == node_index) || is_insert_fp_profiling_task) {
    uint64_t jobid_log_id = ge::GetContext().TraceId();
    GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id);

@@ -927,22 +931,40 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
    task_def_list.emplace_back(fp_task_def);
  }

  for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
    if (all_reduce_nodes[i] != node_index) {
      continue;
  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  uint64_t all_reduce_task_idx = 0;
  bool is_insert_all_reduce_task = false;
  if (is_all_reduce && is_insert_bp_profiling_task) {
    all_reduce_task_idx = all_reduce_node_idx;
    is_insert_all_reduce_task = true;
  }
  if (is_all_reduce) {
    all_reduce_node_idx++;
  }
  if (!is_insert_all_reduce_task) {
    for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
      if (all_reduce_nodes[i] == node_index) {
        all_reduce_task_idx = i;
        is_insert_all_reduce_task = true;
        break;
      }
    }
  }

  if (is_insert_all_reduce_task) {
    GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef ar_task_def;
    ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
    ar_task_def.set_stream_id(op_desc->GetStreamId());
    LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
    if (ar_log_def != nullptr) {
      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
      GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
                      GELOGE(FAILED, "Multiply result is out of range.");
                      return FAILED);
      auto log_id = i * kProfilingArStep + kProfilingArStartLogid;
      auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid;
      ar_log_def->set_logid(log_id);
      ar_log_def->set_notify(false);
      (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
    }
    task_def_list.push_back(ar_task_def);
  }
@@ -951,16 +973,27 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const

 Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                               vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                               vector<domi::TaskDef> &task_def_list) {
                                               vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) {
  GE_CHECK_NOTNULL(op_desc);
  const char *profiling_mode = std::getenv(kProfilingMode);
  bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
                      ProfilingManager::Instance().ProfilingTrainingTraceOn();
  if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
      (profiling_point.end_index.empty())) {
  bool is_insert_bp_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
  bool is_insert_end_profiling_task = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
  bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
                                   (profiling_point.end_index.empty())) &&
                                  (!(is_insert_bp_profiling_task || is_insert_end_profiling_task));
  if (!is_profiling || no_insert_profiling_task) {
    return SUCCESS;
  }
  if (profiling_point.bp_index == node_index) {
  GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu",
         is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index,
         profiling_point.end_index.size() );

  bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
  if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) {
    GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef bp_task_def;
    bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
@@ -971,7 +1004,9 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
    bp_log_def->set_notify(false);
    task_def_list.emplace_back(bp_task_def);
  }
  if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {

  if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end() ||
      is_insert_end_profiling_task) {
    GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef end_task_def;
    end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
@@ -983,20 +1018,32 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
    task_def_list.emplace_back(end_task_def);
  }

  uint32_t all_reduce_task_idx = 0;
  bool is_insert_all_reduce_task = false;
  if (is_all_reduce && is_insert_bp_profiling_task) {
    all_reduce_task_idx = all_reduce_node_idx;
    is_insert_all_reduce_task = true;
  }

  for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
    if (all_reduce_nodes[i] != node_index) {
      continue;
    if (all_reduce_nodes[i] == node_index) {
      all_reduce_task_idx = i;
      is_insert_all_reduce_task = true;
      break;
    }
  }

  if (is_insert_all_reduce_task) {
    GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
    TaskDef ar_task_def;
    ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
    ar_task_def.set_stream_id(op_desc->GetStreamId());
    LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
    GE_CHECK_NOTNULL(ar_log_def);
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
                    GELOGE(FAILED, "Multiply result is out of range.");
                    return FAILED);
    auto log_id = i * kProfilingArStep + kProfilingArEndLogid;
    auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid;
    ar_log_def->set_logid(log_id);
    ar_log_def->set_notify(false);
    task_def_list.emplace_back(ar_task_def);
--- a/ge/graph/build/task_generator.h
+++ b/ge/graph/build/task_generator.h
@@ -51,6 +51,7 @@ struct FusionTaskInfo {
  std::map<uint32_t, string> &op_name_map;
  ProfilingPoint &profiling_point;
  vector<uint32_t> all_reduce_nodes;
  uint64_t all_reduce_node_idx;
 };

 class TaskGenerator {
@@ -76,6 +77,8 @@ class TaskGenerator {
  ///
  Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context);

  Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
                                std::vector<uint32_t> &all_reduce_nodes);
 private:
  Status UpdateAnchorStatus(const NodePtr &node);

@@ -126,10 +129,10 @@ class TaskGenerator {
                                std::vector<uint32_t> &all_reduce_nodes) const;
  Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                   std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                   std::vector<domi::TaskDef> &task_def_list);
                                   std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx);
  Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
                                  std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
                                  std::vector<domi::TaskDef> &task_def_list);
                                  std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx);

  static bool IsProfPoint(const OpDescPtr &op, const std::string &name);

--- a/ge/graph/common/transop_util.cc
+++ b/ge/graph/common/transop_util.cc
@@ -23,7 +23,10 @@
 namespace {
 const int kInvalidTransopDataIndex = -1;
 const int kTransOpOutIndex = 0;
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}};
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {
  {ge::DT_FLOAT, ge::DT_BOOL},
  {ge::DT_INT64, ge::DT_BOOL}
 };
 }  // namespace

 namespace ge {
--- a/ge/graph/execute/graph_execute.cc
+++ b/ge/graph/execute/graph_execute.cc
@@ -560,34 +560,10 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynam
  return SUCCESS;
 }

 Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                        vector<InputOutputDescInfo> &output_desc,
                                                        std::vector<uint32_t> &input_formats,
                                                        std::vector<uint32_t> &out_formats) {
  try {
    auto model_manager = ge::ModelManager::GetInstance();
    GE_CHECK_NOTNULL(model_manager);
    Status ret =
        model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats);
    if (ret != SUCCESS) {
      GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed.");
      return ret;
    }
  } catch (std::bad_alloc &) {
    GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfoForZeroCopy failed, bad memory allocation occur !");
    return MEMALLOC_FAILED;
  } catch (...) {
    GELOGE(FAILED, "GetInputOutputDescInfoForZeroCopy failed, some exceptions occur !");
    return FAILED;
  }

  return SUCCESS;
 }

 Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
 Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info);
  Status ret = model_manager->GetAippInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGW("GetAIPPInfo is not success.");
    return ret;
--- a/ge/graph/execute/graph_execute.h
+++ b/ge/graph/execute/graph_execute.h
@@ -73,7 +73,7 @@ class GraphExecutor {
                                       vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
                                       std::vector<uint32_t> &output_formats, bool new_model_desc = false);

  static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
  static Status GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

  static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);

@@ -110,10 +110,6 @@ class GraphExecutor {

  static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

  static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                  vector<InputOutputDescInfo> &output_desc,
                                                  std::vector<uint32_t> &input_formats,
                                                  std::vector<uint32_t> &output_formats);
  static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                          std::vector<InputOutputDims> &output_dims);
--- a/ge/graph/label/case_label_maker.h
+++ b/ge/graph/label/case_label_maker.h
@@ -86,7 +86,6 @@
                                                                |    Node    |
                                                                +------------+
 *******************************************************************************/

 namespace ge {
 class CaseOpLabelMaker : public LabelMaker {
 public:
--- a/ge/graph/label/if_label_maker.h
+++ b/ge/graph/label/if_label_maker.h
@@ -70,7 +70,6 @@
                                                                |    Node    |
                                                                +------------+
 *******************************************************************************/

 namespace ge {
 class IfOpLabelMaker : public LabelMaker {
 public:
--- a/ge/graph/label/partitioned_call_label_maker.h
+++ b/ge/graph/label/partitioned_call_label_maker.h
@@ -54,7 +54,6 @@
        |       c       |
        +---------------+
 *******************************************************************************/

 namespace ge {
 class PartitionedCallLabelMaker : public LabelMaker {
 public:
--- a/ge/graph/label/while_label_maker.h
+++ b/ge/graph/label/while_label_maker.h
@@ -70,7 +70,6 @@
                                                                |    Node    |
                                                                +------------+
 *******************************************************************************/

 namespace ge {
 class WhileOpLabelMaker : public LabelMaker {
 public:
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
                                     ModelData &model_data) {
  Status ret;
  if (!CheckInputPathValid(path)) {
    GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return GE_EXEC_MODEL_PATH_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
    return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
  }

  GELOGI("Load model begin, model path is: %s", path.c_str());
  if (!key_path.empty() && !CheckInputPathValid(key_path)) {
    GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
    return GE_EXEC_MODEL_KEY_PATH_INVALID;
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
    return ACL_ERROR_GE_PARAM_INVALID;
  }

  ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
    return SUCCESS;
 }

 Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                      const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
  Status ret;
  ModelData model_data;
  ret = LoadDataFromFile(path, key_path, priority, model_data);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
    return ret;
  }

  ret = LoadModel(model_data, listener, model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
    if (model_data.model_data != nullptr) {
      delete[] static_cast<char *>(model_data.model_data);
      model_data.model_data = nullptr;
    }
  }

  if (model_data.model_data != nullptr) {
    delete[] static_cast<char *>(model_data.model_data);
    model_data.model_data = nullptr;
  }

  return ret;
 }

 Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
                              uint32_t &model_id) {
  GELOGI("Load model begin, model_id:%u.", model_id);

  // For GeOp, Open Device 0 here.
  GE_CHK_RT_RET(rtSetDevice(0));
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
  if (ret != SUCCESS) {
    GE_CHK_RT(rtDeviceReset(0));
    GELOGE(ret, "LoadModel: Load failed.");
    return ret;
  }
  ret = model_manager->Start(model_id);
  if (ret != SUCCESS) {
    if (model_manager->Unload(model_id) != SUCCESS) {
      GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
    }
    GELOGE(ret, "LoadModel: Start failed.");
    return ret;
  }
  GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
  return SUCCESS;
 }

 Status GraphLoader::CommandHandle(const Command &command) {
  try {
    auto model_manager = ModelManager::GetInstance();
@@ -225,13 +168,13 @@ Status GraphLoader::CommandHandle(const Command &command) {
 }

 Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
                                      size_t memsize, void *weight_ptr, size_t weightsize) {
                                      size_t mem_size, void *weight_ptr, size_t weight_size) {
  GELOGI("Load model begin, model_id:%u.", model_id);
  // For ACL, Open Device from App.
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->LoadModelOffline(
      model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
      model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
  if (ret != SUCCESS) {
    GELOGE(ret, "Load model failed, model_id:%u.", model_id);
    return ret;
@@ -283,7 +226,8 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn
                                 std::vector<GeTensorDesc> &output_desc) {
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
  Status ret = model_manager->ExecuteModel(model_id, stream, async_mode,
                                           input_data, input_desc, output_data, output_desc);
  if (ret != SUCCESS) {
    GELOGE(ret, "Execute model failed, model_id:%u.", model_id);
    return ret;
@@ -319,10 +263,10 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
  return SUCCESS;
 }

 Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
 Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id);
  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id, sub_model_id);
  if (ret != SUCCESS) {
    GELOGE(ret, "Destroy aicpu kernel failed.");
    return ret;
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@@ -44,12 +44,6 @@ class GraphLoader {

  static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);

  static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
                          uint32_t &model_id);

  static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
                                  const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);

  static Status CommandHandle(const Command &command);

  static Status GetMemoryInfo(int64_t &free);
@@ -68,7 +62,7 @@ class GraphLoader {
                             const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
                             std::vector<GeTensorDesc> &output_desc);

  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

  static Status DestroyAicpuSessionForInfer(uint32_t model_id);

--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@@ -120,6 +120,7 @@ static int32_t GetIrDataType(ge::DataType data_type) {
      {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
      {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
      {ge::DT_STRING, ge::proto::DT_STRING},
      {ge::DT_VARIANT, ge::proto::DT_VARIANT},
  };

  auto iter = data_type_map.find(data_type);
@@ -319,6 +320,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
  for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
    output.mutable_shape()->add_dim(dim);
  }
  for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
    output.mutable_origin_shape()->add_dim(dim);
  }
  int64_t output_size = 0;
  if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
    GELOGE(PARAM_INVALID, "Get output size filed");
@@ -476,6 +480,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
  for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
    input.mutable_shape()->add_dim(dim);
  }
  for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
    input.mutable_origin_shape()->add_dim(dim);
  }
  int64_t input_size = 0;
  if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
    GELOGI("Get aipp input size according to attr is %ld", input_size);
@@ -823,6 +830,13 @@ Status DataDumper::UnloadDumpInfo() {
  return SUCCESS;
 }

 void DataDumper::DumpShrink() {
  compute_graph_.reset();
  input_map_.clear();
  ref_info_.clear();
  op_list_.clear();
 }

 void DataDumper::PrintCheckLog(string &dump_list_key) {
  std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
  if (model_list.empty()) {
@@ -891,6 +905,7 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
      toolkit::dumpdata::DumpData dump_data;
      dump_data.set_version("2.0");
      dump_data.set_dump_time(GetNowTime());
      dump_data.set_op_name(op_desc_info.op_name);
      for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
        toolkit::dumpdata::OpInput input;
        input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i])));
@@ -919,11 +934,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
      ReplaceStringElem(op_name);
      ReplaceStringElem(op_type);
      string dump_file_path =
          "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time);
          "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time);
      GELOGI("The exception dump file path is %s", dump_file_path.c_str());

      uint64_t proto_size = dump_data.ByteSizeLong();
      unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
      std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
      bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
      if (!ret || proto_size == 0) {
        GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
--- a/ge/graph/load/new_model_manager/data_dumper.h
+++ b/ge/graph/load/new_model_manager/data_dumper.h
@@ -83,6 +83,8 @@ class DataDumper {

  Status UnloadDumpInfo();

  void DumpShrink();

  void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
  const DumpProperties &GetDumpProperties() const { return dump_properties_; }
  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
@@ -112,18 +114,18 @@ class DataDumper {
  struct InnerInputMapping;

  std::vector<OpDescInfo> op_desc_info_;
  std::vector<InnerDumpInfo> op_list_;
  std::vector<InnerDumpInfo> op_list_;  // release after DavinciModel::Init
  uint32_t end_graph_task_id_ = 0;
  uint32_t end_graph_stream_id_ = 0;
  bool is_end_graph_ = false;
  std::multimap<std::string, InnerInputMapping> input_map_;
  std::multimap<std::string, InnerInputMapping> input_map_;  // release after DavinciModel::Init
  bool load_flag_;
  uint32_t device_id_;
  uintptr_t global_step_;
  uintptr_t loop_per_iter_;
  uintptr_t loop_cond_;
  ComputeGraphPtr compute_graph_;
  std::map<OpDescPtr, void *> ref_info_;
  ComputeGraphPtr compute_graph_;  // release after DavinciModel::Init
  std::map<OpDescPtr, void *> ref_info_;  // release after DavinciModel::Init
  void *l1_fusion_addr_ = nullptr;


--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -49,6 +49,10 @@
 #include "task_info/task_info.h"
 #include "graph/common/local_context.h"

 using std::mutex;
 using std::thread;
 using std::multimap;

 namespace ge {
 // op debug need 2048 bits buffer
 const size_t kOpDebugMemorySize = 2048UL;
@@ -76,6 +80,39 @@ struct timeInfo {
  int64_t dumpEndTime;
 };

 // For super kernel
 struct SuperKernelTaskInfo {
  uint32_t last_block_dim;
  uint32_t last_args_size;
  uint32_t last_task_id;
  uint32_t last_stream_id;
  void *last_stream;
  void *last_sm_desc;
  vector<void *> kernel_list;
  vector<void *> arg_list;
  vector<uint32_t> dump_flag_list;
  vector<OpDescPtr> op_desc_list;
  vector<uintptr_t> dump_args_list;
  uint32_t last_dump_flag;
  int64_t last_group_key;
  uintptr_t last_dump_args;
  OpDescPtr last_op;
 };

 struct TaskMemInfo {
  int64_t input_size{0};
  int64_t output_size{0};
  int64_t weight_size{0};
  int64_t workspace_size{0};
  int64_t total_size{0};
 };

 struct ProfileInfo {
  FusionOpInfo fusion_info;
  TaskMemInfo memory_info;
  uint32_t task_count{0};
 };

 enum ExecuteMode {
  INITIALIZATION,
  SYNCHRONIZATION,
@@ -90,7 +127,7 @@ class DavinciModel {
  /// @brief DavinciModel constructor
  /// @author
  ///
  DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener);
  DavinciModel(int32_t priority, const shared_ptr<ModelListener> &listener);

  ///
  /// @ingroup ge
@@ -120,7 +157,7 @@ class DavinciModel {
  /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op.
  /// @return: 0 for success / others for fail
  ///
  Status SetQueIds(const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids);
  Status SetQueIds(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids);

  ///
  /// @ingroup ge
@@ -136,6 +173,20 @@ class DavinciModel {
  ///
  void SetId(uint32_t model_id) { model_id_ = model_id; }

  ///
  /// @ingroup ge
  /// @brief Get SubModelId
  /// @return sub model ID
  ///
  uint32_t SubModelId() const { return sub_model_id_; }

  ///
  /// @ingroup ge
  /// @brief Get SubModelId
  /// @return sub model ID
  ///
  void SetSubModelId(uint32_t sub_model_id) { sub_model_id_ = sub_model_id; }

  static void *Run(DavinciModel *model_pointer);

  ///
@@ -190,13 +241,14 @@ class DavinciModel {
  // get total mem size
  size_t TotalMemSize() const { return runtime_param_.mem_size; }

  const std::map<uint32_t, MemInfo> &P2PMemInfos() const {return runtime_param_.memory_infos;}
  const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; }

  // model name
  string Name() const { return name_; }

  // om_name
  string OmName() const { return om_name_; }

  // version
  uint32_t Version() const { return version_; }

@@ -222,12 +274,7 @@ class DavinciModel {

  Status DestroyThread();

  // Get Data Op.
  const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

  // get Op
  const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }

  OpDescPtr GetOpByIndex(uint32_t index) const {
    if (op_list_.find(index) == op_list_.end()) {
      return nullptr;
@@ -235,26 +282,16 @@ class DavinciModel {
    return op_list_.at(index);
  }

  OpDescPtr GetVariableOp(const string &name) {
    for (auto op_desc : variable_op_list_) {
      if (op_desc != nullptr && op_desc->GetName() == name) {
        return op_desc;
      }
    }
    return nullptr;
  }
  void *GetGlobalStep() const { return global_step_addr_; }

  // get task info for profiling
  const std::vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; }
  const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; }

  // get updated task info list
  std::vector<TaskInfoPtr> GetTaskList() { return task_list_; }
  vector<TaskInfoPtr> GetTaskList() { return task_list_; }

  ///
  /// @ingroup ge
  /// @brief get model input and output format
  /// @return ccTensorFormat_t current model input and output format
  ///
  Format GetFormat();
  // Modified from KernelTaskInfo.
  SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; }

  rtModel_t GetRtModelHandle() const { return rt_model_handle_; }

@@ -289,7 +326,7 @@ class DavinciModel {
  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc);

  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc,
                                std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats);
                                vector<uint32_t> &input_formats, vector<uint32_t> &output_formats, bool by_dims);

  ///
  /// @ingroup ge
@@ -298,7 +335,7 @@ class DavinciModel {
  /// @param [out] dynamic_type
  /// @return execute result
  ///
  Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const;
  Status GetDynamicBatchInfo(vector<vector<int64_t>> &batch_info, int32_t &dynamic_type) const;

  ///
  /// @ingroup ge
@@ -306,13 +343,13 @@ class DavinciModel {
  /// @param [out] batch_info
  /// @return None
  ///
  void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const;
  void GetCombinedDynamicDims(vector<vector<int64_t>> &batch_info) const;

  void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const;
  void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const;

  void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type);
  void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const;

  void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);
  void GetModelAttr(vector<string> &dynamic_output_shape_info) const;

  ///
  /// @ingroup ge
@@ -321,9 +358,9 @@ class DavinciModel {
  /// @param [out] aipp_info
  /// @return execute result
  ///
  Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info);
  Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const;

  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index);
  Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const;

  ///
  /// @ingroup ge
@@ -339,18 +376,7 @@ class DavinciModel {
  /// @param [in] string identification: unique identification for current op.
  /// @return None
  ///
  void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification);

  ///
  /// @ingroup ge
  /// @brief get model input and output desc for zero copy
  /// @param [out] input_shape  model input size
  /// @param [out] output_shape model output size
  /// @return execute result
  ///
  Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
                                           vector<InputOutputDescInfo> &output_desc,
                                           std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats);
  void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification);

  Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data);

@@ -372,8 +398,6 @@ class DavinciModel {
  ///
  bool RunFlag() const { return run_flg_; }

  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats);

  ///
  /// @ingroup ge
  /// @brief Set Session Id
@@ -419,14 +443,14 @@ class DavinciModel {
  /// @ingroup ge
  /// @brief Save outside address of Data or NetOutput used info for ZeroCopy.
  /// @param [in] const OpDescPtr &op_desc: current op desc
  /// @param [in] const std::vector<void *> &outside_addrs: address of task
  /// @param [in] const vector<void *> &outside_addrs: address of task
  /// @param [in] const void *args_offset: arguments address save the address.
  /// @return None.
  ///
  void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args,
  void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector<void *> &outside_addrs, const void *info, void *args,
                       size_t size, size_t offset);

  void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type);
  void SetDynamicSize(const vector<uint64_t> &batch_num, int32_t dynamic_type);

  bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }

@@ -436,26 +460,26 @@ class DavinciModel {

  int64_t GetLoadEndTime() { return load_end_time_; }

  Status SinkModelProfile();

  Status SinkTimeProfile(const InputData &current_data);

  Status ReportProfilingData(bool check_device = true);
  Status ReportProfilingData();

  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
    data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
  }

  void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) {
  void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) {
    data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
  }

  void DumperShrink() {
    data_dumper_.DumpShrink();
  }

  void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
  DavinciModel &operator=(const DavinciModel &model) = delete;

  DavinciModel(const DavinciModel &model) = delete;

  const map<int64_t, std::vector<rtStream_t>> &GetHcclFolowStream() {
  const map<int64_t, vector<rtStream_t>> &GetHcclFolowStream() {
    return main_follow_stream_mapping_;
  }
  void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream);
@@ -473,8 +497,14 @@ class DavinciModel {
    void *cur_args = static_cast<char *>(args_) + offset;
    return cur_args;
  }
  void SetTotalIOAddrs(vector<void *> &io_addrs) {
    total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
  void SetTotalIOAddrs(const vector<void *> &io_addrs);
  void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
  uint32_t GetHybridArgsSize() {
    return total_hybrid_args_size_;
  }
  void *GetCurrentHybridArgsAddr(uint32_t offset) {
    void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
    return cur_args;
  }
  void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
  int64_t GetFixedAddrsSize(string tensor_name);
@@ -494,13 +524,13 @@ class DavinciModel {
  Status MallocKnownArgs();
  Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  Status UpdateKnownZeroCopyAddr();
  Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
  void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
  Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
                                   std::vector<InputOutputDims> &output_dims);
  void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }
  Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
  Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
                                   vector<InputOutputDims> &output_dims) const;

  // om file name
  void SetOmName(string om_name) { om_name_ = om_name; }

@@ -510,13 +540,13 @@ class DavinciModel {
  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
    return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
  }
  Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph);

 private:
  // memory address of weights
  uint8_t *weights_mem_base_;
  uint8_t *var_mem_base_;
  // memory address of model
  uintptr_t fixed_mem_base_;  // Initial of mem_base_, keep forever.
  uint8_t *mem_base_;
  uint8_t *p2p_mem_base_;
  bool is_inner_mem_base_;
@@ -529,14 +559,7 @@ class DavinciModel {
  struct timeInfo time_info_;
  int32_t dataInputTid;

  ///
  /// @ingroup ge
  /// @brief Save Batch label Info.
  /// @param [in] const OpDescPtr &op_desc
  /// @param [in] uintptr_t addr: address value in args block.
  /// @return None.
  ///
  void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr);
  void *GetRunAddress(void *addr) const;

  ///
  /// @ingroup ge
@@ -575,7 +598,7 @@ class DavinciModel {
  /// @param [in] batch_label: batch label for multi-batch scenes
  /// @return SUCCESS handle successfully / others handle failed
  ///
  Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
  Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
                          const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label);

  Status CopyInputData(const InputData &input_data, bool device_data = false);
@@ -587,11 +610,12 @@ class DavinciModel {
  Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
  Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);

  void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);
  void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, ShapeDescription &shape1, ShapeDescription &shape2);

  void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input);
  void SetInputDimsInfo(const vector<int64_t> &input_dims, Format &format, ShapeDescription &shape_info);

  Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats);
  Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats, bool by_dims) const;
  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats) const;

  Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);

@@ -603,7 +627,7 @@ class DavinciModel {

  uint8_t *MallocWeightsMem(size_t weights_size);

  uint8_t* MallocP2PMem(size_t p2p_data_size);
  uint8_t *MallocP2PMem(size_t p2p_data_size);

  void FreeFeatureMapMem();

@@ -613,6 +637,8 @@ class DavinciModel {

  void ReleaseTask();

  void ClearTaskAddrs();

  void UnbindTaskSinkStream();

  bool IsAicpuKernelConnectSpecifiedLayer();
@@ -635,59 +661,33 @@ class DavinciModel {
  ///
  /// @ingroup ge
  /// @brief Data Op Initialize.
  /// @param [in] ComputeGraphPtr: root graph of the model.
  /// @param [in] NodePtr: Data Op.
  /// @param [in/out] data_op_index: NetOutput addr size info.
  /// @param [in/out] data_op_index: index of courrent count.
  /// @param [in/out] data_by_index: Data ordered by index.
  /// @return Status
  ///
  Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index);
  Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
                    map<uint32_t, OpDescPtr> &data_by_index);

  ///
  /// @ingroup ge
  /// @brief Sort Data op list by index.
  /// @param [in] data_by_index: map of Data Op.
  /// @return
  ///
  void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index);

  ///
  /// @ingroup ge
  /// @brief input zero copy node Initialize.
  /// @param [in] NodePtr: Data Op.
  /// @param [in] output_op_list: list of NetOutput op.
  /// @return Status
  ///
  Status InitInputZeroCopyNodes(const NodePtr &node);
  Status GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);

  ///
  /// @ingroup ge
  /// @brief NetOutput Op Initialize.
  /// @param [in] ComputeGraphPtr: root graph of the model.
  /// @param [in] NodePtr: NetOutput Op.
  /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
  /// @return Status
  ///
  Status InitNetOutput(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief output zero copy node Initialize.
  /// @param [in] NodePtr: Data Op.
  /// @return Status
  ///
  Status InitOutputZeroCopyNodes(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief input zero copy node Initialize for Case.
  /// @param [in] NodePtr: Data Op.
  /// @return Status
  ///
  Status InitInputBatchLabel(const NodePtr &node);

  ///
  /// @ingroup ge
  /// @brief output zero copy node Initialize for Case.
  /// @param [in] NodePtr: netoutput Op.
  /// @return Status
  ///
  Status InitOutputBatchLabel(const NodePtr &node);
  Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list);

  ///
  /// @ingroup ge
@@ -696,7 +696,7 @@ class DavinciModel {
  ///
  Status InitConstant(const OpDescPtr &op_desc);

  Status InitVariable(const OpDescPtr &op_desc);
  Status InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name);

  /// @ingroup ge
  /// @brief LabelSet Op Initialize.
@@ -726,7 +726,7 @@ class DavinciModel {
  ///
  Status InitTbeHandle(const OpDescPtr &op_desc);

  void StoreTbeHandle(const std::string &handle_key);
  void StoreTbeHandle(const string &handle_key);
  void CleanTbeHandle();

  ///
@@ -757,7 +757,7 @@ class DavinciModel {
  ///
  Status BindInputQueue();

  Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
  Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs);

  ///
  /// @ingroup ge
@@ -828,98 +828,108 @@ class DavinciModel {

  Status DoTaskSink();

  void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);
  void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);

  Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

  // get desc info of graph for profiling
  Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);

  void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);
  void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);

  Status InitL1DataDumperArgs();

  Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
                             std::vector<ge::OutputTensorInfo> &outputs);
  Status InitModelProfile();
  Status SinkModelProfile();

  Status SinkTimeProfile(const InputData &current_data);

  void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);
  Status InitOutputTensorInfo(const OpDescPtr &op_desc);
  Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);

  Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index);
  Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list);

  Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
  Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
  Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list);
  Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc);

  void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info);
  void SetLabelForDynamic(const NodePtr &node);

  void ParseDynamicOutShape(const std::vector<std::string> &str_info, std::vector<vector<int64_t>> &vec_info);
  void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info);
  bool IsGetNextSinkDynamic(const OpDescPtr &op_desc);

  Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node);
  void GetAllGearsInfo(const NodePtr &node);
  Status GetGetDynamicDimsNodeInfo(const NodePtr &node);
  Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node);
  Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
  Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);
  Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
  Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
  Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);

  bool is_weight_mem_has_inited_;
  bool is_feature_map_mem_has_inited_;

  uint32_t model_id_;
  uint32_t runtime_model_id_;
  uint32_t sub_model_id_ = 0;
  string name_;

  // used for inference data dump
  string om_name_;

  uint32_t version_;
  GeModelPtr ge_model_;
  GeModelPtr ge_model_;  // release after DavinciModel::Init

  bool need_destroy_aicpu_kernel_{false};
  vector<std::string> out_node_name_;

  map<uint32_t, OpDescPtr> op_list_;
  vector<string> out_node_name_;

  // data op_desc
  vector<OpDescPtr> data_op_list_;
  map<uint32_t, OpDescPtr> op_list_;  // release after DavinciModel::Init

  vector<OpDescPtr> output_op_list_;
  map<string, GeTensorDesc> broadcast_variable_;
  void *global_step_addr_{nullptr};
  uint64_t global_step_size_{0};

  vector<OpDescPtr> variable_op_list_;
  map<uint32_t, ZeroCopyOffset> new_input_data_info_;
  map<uint32_t, ZeroCopyOffset> new_output_data_info_;
  map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
  map<const void *, ZeroCopyOffset> new_output_outside_addrs_;

  std::map<uint32_t, ZeroCopyOffset> new_input_data_info_;
  std::map<uint32_t, ZeroCopyOffset> new_output_data_info_;
  std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
  std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_;

  std::set<const void *> real_virtual_addrs_;
  set<const void *> real_virtual_addrs_;

  // output op: save cce op actual needed memory size
  vector<int64_t> output_memory_size_list_;

  std::thread thread_id_;
  thread thread_id_;

  std::shared_ptr<ModelListener> listener_;
  shared_ptr<ModelListener> listener_;

  bool run_flg_;

  std::mutex mux_run_flg_;
  mutex mux_run_flg_;

  int32_t priority_;

  vector<rtStream_t> stream_list_;

  std::mutex all_hccl_stream_list_mutex_;
  mutex all_hccl_stream_list_mutex_;
  vector<rtStream_t> all_hccl_stream_list_;

  // for reuse hccl_follow_stream
  std::mutex capacity_of_stream_mutex_;
  std::map<int64_t, std::vector<rtStream_t>> main_follow_stream_mapping_;
  mutex capacity_of_stream_mutex_;
  map<int64_t, vector<rtStream_t>> main_follow_stream_mapping_;

  vector<rtEvent_t> event_list_;

  vector<rtLabel_t> label_list_;
  set<uint32_t> label_id_indication_;

  std::mutex outside_addrs_mutex_;
  std::vector<ZeroCopyTask> zero_copy_tasks_;  // Task used Data or NetOutput addr.
  std::set<const void *> copy_only_addrs_;     // Address need copy to original place.

  // {op_id, batch_label}
  std::map<int64_t, std::string> zero_copy_op_id_batch_label_;
  // {batch_label, addrs}
  std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_;
  mutex outside_addrs_mutex_;
  vector<ZeroCopyTask> zero_copy_tasks_;  // Task used Data or NetOutput addr.
  set<const void *> copy_only_addrs_;     // Address need copy to original place.

  std::vector<TaskInfoPtr> task_list_;
  vector<TaskInfoPtr> task_list_;
  // rt_moodel_handle
  rtModel_t rt_model_handle_;

@@ -937,39 +947,41 @@ class DavinciModel {
  rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED};

  // ACL queue schedule, save queue ids for Init.
  std::vector<TaskInfoPtr> cpu_task_list_;
  std::vector<uint32_t> input_queue_ids_;    // input queue ids created by caller.
  std::vector<uint32_t> output_queue_ids_;   // output queue ids created by caller.
  std::vector<uintptr_t> input_mbuf_list_;   // input mbuf created by dequeue task.
  std::vector<uintptr_t> output_mbuf_list_;  // output mbuf created by dequeue task.
  vector<TaskInfoPtr> cpu_task_list_;
  vector<uint32_t> input_queue_ids_;    // input queue ids created by caller.
  vector<uint32_t> output_queue_ids_;   // output queue ids created by caller.
  vector<uintptr_t> input_mbuf_list_;   // input mbuf created by dequeue task.
  vector<uintptr_t> output_mbuf_list_;  // output mbuf created by dequeue task.

  uint64_t session_id_;

  uint32_t device_id_;

  std::mutex flowctrl_op_index_internal_map_mutex_;
  std::map<uint32_t, uint32_t> flowctrl_op_index_internal_map_;
  mutex flowctrl_op_index_internal_map_mutex_;
  map<uint32_t, uint32_t> flowctrl_op_index_internal_map_;

  std::vector<rtStream_t> active_stream_list_;
  std::set<uint32_t> active_stream_indication_;
  vector<rtStream_t> active_stream_list_;
  set<uint32_t> active_stream_indication_;

  std::set<uint32_t> hcom_streams_;
  set<uint32_t> hcom_streams_;
  RuntimeParam runtime_param_;

  static std::mutex tvm_bin_mutex_;
  std::set<std::string> tvm_bin_kernel_;
  static mutex tvm_bin_mutex_;
  set<string> tvm_bin_kernel_;

  std::map<std::string, uint32_t> used_tbe_handle_map_;
  map<string, uint32_t> used_tbe_handle_map_;

  // for profiling task and graph info
  std::vector<TaskDescInfo> task_desc_info_;
  vector<TaskDescInfo> task_desc_info_;

  std::map<std::string, std::pair<uint32_t, uint32_t>> profiler_report_op_info_;

  int64_t maxDumpOpNum_;
  // for data dump
  DataDumper data_dumper_;
  uint64_t iterator_count_;
  bool is_l1_fusion_enable_;
  std::map<OpDescPtr, void *> saved_task_addrs_;
  map<OpDescPtr, void *> saved_task_addrs_;  // release after DavinciModel::Init
  void *l1_fusion_addr_ = nullptr;

  bool known_node_ = false;
@@ -977,15 +989,17 @@ class DavinciModel {
  void *args_ = nullptr;
  void *args_host_ = nullptr;
  void *fixed_addrs_ = nullptr;
  void *hybrid_addrs_ = nullptr;
  uint32_t total_hybrid_args_size_ = 0;
  int64_t total_fixed_addr_size_ = 0;
  std::map<const void *, void *> knonw_input_data_info_;
  std::map<const void *, void *> knonw_output_data_info_;
  map<const void *, void *> known_input_data_info_;
  map<const void *, void *> known_output_data_info_;
  vector<void *> total_io_addrs_;
  vector<void *> orig_total_io_addrs_;
  bool base_addr_not_changed_ = false;

  vector<vector<int64_t>> batch_info_;
  std::vector<std::vector<int64_t>> combined_batch_info_;
  vector<vector<int64_t>> combined_batch_info_;
  vector<string> user_designate_shape_order_;
  int32_t dynamic_type_ = 0;
  bool is_dynamic_ = false;
@@ -993,29 +1007,54 @@ class DavinciModel {
  vector<uint64_t> batch_size_;
  // key: input tensor name, generally rts op;
  // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op
  std::map<string, int64_t> tensor_name_to_fixed_addr_size_;
  map<string, int64_t> tensor_name_to_fixed_addr_size_;

  // key: input tensor name, generally rts op; value: the peer output anchor of the peer op
  std::map<string, int64_t> tensor_name_to_peer_output_index_;
  map<string, int64_t> tensor_name_to_peer_output_index_;
  // if model is first execute
  bool is_first_execute_;
  // for op debug
  std::mutex debug_reg_mutex_;
  mutex debug_reg_mutex_;
  bool is_op_debug_reg_ = false;
  void *op_debug_addr_ = nullptr;
  void *p2p_debug_addr_ = nullptr;
  bool is_new_model_desc_{false};
  bool is_online_infer_dynamic_ = false;
  bool is_getnext_sink_dynamic_ = false;
  std::vector<int64_t> cur_dynamic_dims_;
  vector<int32_t> cur_dynamic_dims_;
  void *netoutput_last_input_addr_ = nullptr;
  int64_t netoutput_last_input_size_ = 0;
  size_t shape_of_cur_dynamic_dims_ = 0;
  // key: input_index: input is merge node; value: each gear info and each output size
  std::map<size_t, std::map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
  map<size_t, map<vector<int32_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
  // key: input_index: input is merge node; value: each gear info and each output shape
  std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  std::vector<std::vector<int64_t>> all_gears_info_;
  map<size_t, map<vector<int32_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  vector<vector<int32_t>> all_gears_info_;

  multimap<uint32_t, uint32_t> op_id_map_;
  vector<ProfileInfo> profile_list_;

  // For super kernel.
  SuperKernelTaskInfo skt_info_;

  bool is_dynamic_aipp_ = false;
  vector<string> dynamic_output_shape_info_;

  vector<vector<void *>> input_addrs_list_;
  vector<vector<void *>> output_addrs_list_;

  vector<int64_t> output_buffer_size_;
  vector<GeShape> output_shape_info_;

  map<uint32_t, OriginInputInfo> orig_input_info_;
  map<uint32_t, AippConfigInfo> aipp_info_list_;
  map<uint32_t, pair<InputAippType, size_t>> aipp_type_list_;
  map<uint32_t, pair<vector<InputOutputDims>, vector<InputOutputDims>>> aipp_dims_info_;

  vector<InputOutputDescInfo> input_descs_;
  vector<InputOutputDescInfo> input_descs_dims_;
  vector<uint32_t> input_formats_;
  vector<InputOutputDescInfo> output_descs_;
  vector<uint32_t> output_formats_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
--- a/ge/graph/load/new_model_manager/davinci_model_parser.cc
+++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc
@@ -16,82 +16,7 @@

 #include "graph/load/new_model_manager/davinci_model_parser.h"

 #include <fstream>
 #include <memory>
 #include <vector>
 #include "securec.h"

 #include "common/debug/log.h"
 #include "graph/load/new_model_manager/davinci_model.h"

 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelInfoParser(const ModelData &model, ModelInfo &model_info) {
  GE_CHK_RT_RET(rtSetDevice(0));
  try {
    uint32_t model_len = 0;
    uint8_t *model_data = nullptr;

    Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len);

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); return ret, "Parse model failed");

    auto *file_header = reinterpret_cast<ModelFileHeader *>(model.model_data);

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_header == nullptr, GE_CHK_RT(rtDeviceReset(0));
                                   return PARAM_INVALID, "file_header is null.");

    model_info.version = file_header->version;
    model_info.is_encrypt = false;
    GE_IF_BOOL_EXEC(ENCRYPTED == file_header->is_encrypt, model_info.is_encrypt = true);

    std::shared_ptr<DavinciModel> davinci_model =
      std::shared_ptr<DavinciModel>(new (std::nothrow) DavinciModel(model.priority, nullptr));

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(davinci_model == nullptr, GE_CHK_RT(rtDeviceReset(0));
                                   return PARAM_INVALID, "davinci_model is null.");

    GE_MAKE_GUARD(davinci_model, [&] { davinci_model = nullptr; });

    ModelHelper model_helper;
    ret = model_helper.LoadModel(model);
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((ret != SUCCESS), GE_CHK_RT(rtDeviceReset(0)); return FAILED, "load model failed");

    ret = davinci_model->Assign(model_helper.GetGeModel());
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
                                   return ret, "Parse davinci model data failed");

    ret = davinci_model->Init();

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
                                   return ret, "Davinci model init failed");

    vector<InputOutputDescInfo> input_list;
    vector<InputOutputDescInfo> output_list;

    ret = davinci_model->GetInputOutputDescInfo(input_list, output_list);

    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0));
                                   return ret, "Davinci model GetInputOutputDescInfo failed");

    for (const auto &desc : input_list) {
      model_info.input_desc.push_back(desc.shape_info);
    }
    for (const auto &desc : output_list) {
      model_info.output_desc.push_back(desc.shape_info);
    }

    model_info.name = davinci_model->Name();
  } catch (...) {
    DOMI_LOGE("OM model parser failed, some exceptions occur !");
    GE_CHK_RT(rtDeviceReset(0));
    return FAILED;
  }

  GE_CHK_RT(rtDeviceReset(0));

  return SUCCESS;
 }

 DavinciModelParser::DavinciModelParser() {}

 DavinciModelParser::~DavinciModelParser() {}
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -18,6 +18,8 @@

 #include <string>

 #include "mmpa/mmpa_api.h"
 #include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
 #include "common/dump/dump_manager.h"
 #include "common/l2_cache_optimize.h"
 #include "common/profiling/profiling_manager.h"
@@ -30,6 +32,7 @@
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "graph/utils/attr_utils.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "hybrid/hybrid_davinci_model.h"

@@ -40,9 +43,7 @@ const int kCmdParSize = 2;
 const int kDumpCmdPairSize = 2;
 const std::size_t kProfCmdParaMaxSize = 1000;
 const std::size_t kProfStartCmdParaSize = 2;
 const std::string kCmdTypeProfile = "profile";
 const std::string kCmdTypeDump = "dump";
 const std::string kCmdTypeProfiling = "profiling";
 const std::string kCmdTypeProfInit = "prof_init";
 const std::string kCmdTypeProfFinalize = "prof_finalize";
 const std::string kCmdTypeProfStart = "prof_start";
@@ -51,6 +52,9 @@ const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe";
 const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe";
 const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
 const char *const kDeleteCustOp = "deleteCustOp";
 const int kTimeSpecNano = 1000000000;
 const int kTimeSpecMiro = 1000000;
 const int kOpNameMaxSize = 100;
 struct CustAicpuSoBuf {
  uint64_t kernelSoBuf;
  uint32_t kernelSoBufLen;
@@ -77,7 +81,8 @@ ModelManager::ModelManager() {
  session_id_bias_ = 0;
 }

 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
 Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
                                    uint32_t sub_model_id) {
  STR_FWK_OP_KERNEL param_base = {};
  void *devicebase = nullptr;
  void *aicpu_kernel_addr = nullptr;
@@ -87,10 +92,12 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
  param_base.fwkKernelBase.fwk_kernel.sessionID = session_id;
  if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
    std::vector<uint64_t> v_aicpu_kernel;
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                            std::to_string(sub_model_id);
    std::lock_guard<std::recursive_mutex> lock(map_mutex_);
    auto iter = model_aicpu_kernel_.find(model_key);
    if (iter != model_aicpu_kernel_.end()) {
      GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
      GELOGD("kernel destroy session_id %lu, model_id %u, sub_model_id %u..", session_id, model_id, sub_model_id);
      v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
      // Insert size of aicpu kernel vector in the first element
      v_aicpu_kernel.insert(v_aicpu_kernel.begin(), v_aicpu_kernel.size());
@@ -175,7 +182,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
 }

 void ModelManager::DestroyAicpuSession(uint64_t session_id) {
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  auto it = sess_ids_.find(session_id);
  if (it == sess_ids_.end()) {
    GELOGI("The session: %lu not created.", session_id);
@@ -188,7 +195,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
      GE_CHK_RT(rtSetDevice(static_cast<int32_t>(GetContext().DeviceId())));
    }

    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0, 0);
    if (ret != SUCCESS) {
      GELOGW("The session: %lu destroy failed.", session_id);
    } else {
@@ -204,7 +211,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
 }

 ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
  if (hybrid_davinci_model != hybrid_model_map_.end()) {
    uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@@ -214,20 +221,22 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

  auto it = model_map_.find(model_id);
  if (it == model_map_.end()) {
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
    return GE_EXEC_MODEL_ID_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
  }
  uint64_t session_id = it->second->GetSessionId();
  DestroyAicpuSession(session_id);
  return SUCCESS;
 }

 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
  GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                          std::to_string(sub_model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id,
                                sub_model_id);
    if (ret != SUCCESS) {
      GELOGE(FAILED, "Destroy aicpu kernel failed.");
      return FAILED;
@@ -236,10 +245,12 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
  return SUCCESS;
 }

 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id,
                                           uint64_t kernel_id) {
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  std::vector<uint64_t> v_aicpu_kernel;
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
                          std::to_string(sub_model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
  }
@@ -249,7 +260,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
 }

 ModelManager::~ModelManager() {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  model_map_.clear();
  model_aicpu_kernel_.clear();
  cust_aicpu_so_.clear();
@@ -345,7 +356,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

    GELOGI("Parse model %u success.", model_id);

    davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
    davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano +
                                                     timespec.tv_nsec));  // 1000 ^ 3 converts second to nanosecond
    davinci_model->SetProfileTime(MODEL_LOAD_END);
  } while (0);
@@ -357,24 +368,25 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

 void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
  GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  model_map_[id] = davinci_model;
 }

 void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
  GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  hybrid_model_map_[id] = hybrid_model;
 }

 Status ModelManager::DeleteModel(uint32_t id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  auto it = model_map_.find(id);
  auto hybrid_model_it = hybrid_model_map_.find(id);
  if (it != model_map_.end()) {
    uint64_t session_id = it->second->GetSessionId();
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id);
    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id)  + "_" +
                            std::to_string(it->second->SubModelId());
    auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
    if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
      (void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
@@ -383,22 +395,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
  } else if (hybrid_model_it != hybrid_model_map_.end()) {
    (void)hybrid_model_map_.erase(hybrid_model_it);
  } else {
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
    return GE_EXEC_MODEL_ID_INVALID;
    GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
    return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
  }

  return SUCCESS;
 }

 std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  auto it = model_map_.find(id);
  return (it == model_map_.end()) ? nullptr : it->second;
 }

 std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);

  auto it = hybrid_model_map_.find(id);
  return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@@ -455,8 +467,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d

 Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims,
                                       const vector<pair<string, vector<int64_t>>> &user_input_dims,
                                       vector<int64_t> &cur_dynamic_dims) {
  GELOGD(" Start get cur dynamic dims.");
                                       vector<int32_t> &cur_dynamic_dims) {
  GELOGD("Start get cur dynamic dims.");
  if (user_real_input_dims.size() != user_input_dims.size()) {
    GELOGE(INTERNAL_ERROR,
           "The input count of user: %zu should be equal to the data count of graph: %zu",
@@ -473,7 +485,7 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
    }
    for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) {
      if (user_input_dims.at(i).second.at(j) < 0) {
        cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]);
        cur_dynamic_dims.emplace_back(static_cast<int32_t>(user_real_input_dims[i][j]));
      }
    }
  }
@@ -518,7 +530,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
    input_data.blobs.push_back(data);
  }
  if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
    std::vector<int64_t> cur_dynamic_dims;
    std::vector<int32_t> cur_dynamic_dims;
    if (!GetLocalOmgContext().user_real_input_dims.empty()) {
      if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims,
                            cur_dynamic_dims) != SUCCESS) {
@@ -526,9 +538,9 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
        return INTERNAL_ERROR;
      }
      DataBuffer data;
      data.data = new(std::nothrow) int64_t[cur_dynamic_dims.size()];
      data.data = new(std::nothrow) int32_t[cur_dynamic_dims.size()];
      GE_CHECK_NOTNULL(data.data);
      uint64_t length = static_cast<uint64_t>(cur_dynamic_dims.size() * sizeof(int64_t));
      uint32_t length = static_cast<uint32_t>(cur_dynamic_dims.size() * sizeof(int32_t));
      GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR,
                       "Failed to memcpy data.");
      data.length = length;
@@ -629,8 +641,7 @@ Status ModelManager::Stop(uint32_t model_id) {
 ///
 Status ModelManager::HandleCommand(const Command &command) {
  static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = {
      {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand},
      {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand},
      {kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfInit, HandleProfInitCommand},
      {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand},
      {kCmdTypeProfStop, HandleProfStopCommand},
      {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand},
@@ -645,21 +656,6 @@ Status ModelManager::HandleCommand(const Command &command) {
  }
 }

 Status ModelManager::HandleAclProfilingCommand(const Command &command) {
  if (command.cmd_params.size() < kCmdParSize) {
    GELOGE(PARAM_INVALID, "When the cmd_type is 'profiling', the size of cmd_params must larger than 2.");
    return PARAM_INVALID;
  }

  std::string map_key = command.cmd_params[0];
  std::string value = command.cmd_params[1];
  if (map_key == PROFILE_CONFIG) {
    ProfilingManager::Instance().SetProfilingConfig(value);
  }

  return SUCCESS;
 }

 Status ModelManager::GetModelByCmd(const Command &command,
                                   std::shared_ptr<DavinciModel> &davinci_model) {
  if (command.cmd_params.size() < kCmdParSize) {
@@ -806,29 +802,6 @@ Status ModelManager::HandleProfStopCommand(const Command &command) {
  return SUCCESS;
 }

 Status ModelManager::HandleProfileCommand(const Command &command) {
  if (command.cmd_params.size() < kCmdParSize) {
    GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2.");
    return PARAM_INVALID;
  }

  std::string map_key = command.cmd_params[0];
  std::string value = command.cmd_params[1];

  GELOGI("Profiling mode, Command key:%s , value:%s ", map_key.c_str(), value.c_str());

  auto iter = PROFILE_COMPONENT_MAP.find(map_key);
  if (iter != PROFILE_COMPONENT_MAP.end()) {
    std::string property_value = (value == "on") ? "1" : "0";
    PropertiesManager::Instance().SetPropertyValue(iter->second, property_value);
  }

  if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) {
    PropertiesManager::Instance().SetPropertyValue(map_key, value);
  }
  return SUCCESS;
 }

 static Status ParserPara(const Command &command, const string &dump_key, string &dump_value) {
  auto iter = std::find(command.cmd_params.begin(), command.cmd_params.end(), dump_key);
  if (iter != command.cmd_params.end()) {
@@ -940,12 +913,10 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);

  davinci_model->SetModelDescVersion(new_model_desc);

  return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats);
  return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats, new_model_desc);
 }

 ///
@@ -1008,8 +979,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
 }

 Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  auto davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetCurShape Failed, Invalid Model ID %u!", model_id);
  davinci_model->GetCurShape(batch_info, dynamic_type);
  return SUCCESS;
 }
@@ -1022,22 +994,12 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetModelAttr Failed, Invalid Model ID %u!", model_id);
  davinci_model->GetModelAttr(dynamic_output_shape_info);
  return SUCCESS;
 }

 Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                                       vector<InputOutputDescInfo> &output_desc,
                                                       std::vector<uint32_t> &inputFormats,
                                                       std::vector<uint32_t> &outputFormats) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
                         model_id);

  return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP info
@@ -1046,25 +1008,27 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
 /// @param [out] aipp_info
 /// @return execute result
 ///
 Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
 Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

  return davinci_model->GetAIPPInfo(index, aipp_info);
      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
  return davinci_model->GetAippInfo(index, aipp_info);
 }

 Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetAIPPInfo failed, invalid model_id is %u.",
                         model_id);

      "GetAIPPInfo failed, invalid model_id is %u.", model_id);
  return davinci_model->GetAippType(index, type, aipp_index);
 }

 Status ModelManager::GenSessionId(uint64_t &session_id) {
  const uint64_t kSessionTimeMask = 0xffffffffffff0000;
  const uint64_t kSessionPidMask  = 0x000000000000ff00;
  const uint64_t kSessionBiasMask = 0x00000000000000ff;

  const uint64_t kMaskPerOffset = 8;

  std::lock_guard<std::mutex> lock(session_id_create_mutex_);

  mmTimeval tv;
@@ -1072,12 +1036,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {
    GELOGE(INTERNAL_ERROR, "Failed to get current time.");
    return INTERNAL_ERROR;
  }
  session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec);  // 1000000us
  uint64_t timestamp = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec);  // 1000000us

  static uint32_t pid = mmGetPid();

  session_id_bias_++;
  // max bais 100.
  session_id_bias_ = session_id_bias_ % 100;
  session_id = session_id * 100 + session_id_bias_;

  session_id = ((timestamp<<kMaskPerOffset<<kMaskPerOffset) & kSessionTimeMask) +
               ((pid<<kMaskPerOffset) & kSessionPidMask) + (session_id_bias_ & kSessionBiasMask);

  GELOGD("Generate new session id: %lu.", session_id);
  return SUCCESS;
@@ -1086,15 +1052,22 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {
 Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
                                      void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
  GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK,
 	                 ACL_ERROR_GE_PARAM_INVALID,
                         "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno));
      ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno));
  GenModelId(&model_id);

  shared_ptr<DavinciModel> davinci_model = nullptr;
  mmTimespec timespec = mmGetTickCount();

  ModelHelper model_helper;
  Status ret = model_helper.LoadModel(model);
  Status ret = model_helper.LoadRootModel(model);
  if (model_helper.GetModelType()) {
    bool is_shape_unknown = false;
    GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
                      "CheckIsUnknownShape failed, model id:%u", model_id);
    if (is_shape_unknown || GetContext().GetHostExecFlag()) {
      return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
    }
  }
  if (ret != SUCCESS) {
    GELOGE(ret, "load model failed.");
    return ret;
@@ -1108,8 +1081,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    } catch (...) {
      GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
      return INTERNAL_ERROR;
      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
      return ACL_ERROR_GE_MEMORY_ALLOCATION;
    }
    ret = davinci_model->Assign(ge_model);
    if (ret != SUCCESS) {
@@ -1121,7 +1094,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
    int32_t device_id = 0;
    rtError_t rt_ret = rtGetDevice(&device_id);
    if (rt_ret != RT_ERROR_NONE || device_id < 0) {
      GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    davinci_model->SetDeviceId(device_id);
@@ -1148,7 +1121,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model

    GELOGI("Parse model %u success.", model_id);

    davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
    davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano +
                                                     timespec.tv_nsec));  // 1000 ^ 3 converts second to nanosecond
    davinci_model->SetProfileTime(MODEL_LOAD_END);

@@ -1252,14 +1225,16 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
  }

  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "Invalid model id %u, check whether model has been loaded or not.", model_id);

  if (davinci_model->NeedDestroyAicpuKernel()) {
    GELOGI("Start to destroy specified aicpu kernel.");
    // Zero copy is enabled by default, no need to judge.
    uint64_t session_id_davinci = davinci_model->GetSessionId();
    uint32_t model_id_davinci = davinci_model->GetModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci);
    uint32_t sub_model_id = davinci_model->SubModelId();
    Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci, sub_model_id);
    if (status != SUCCESS) {
      GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci,
             model_id_davinci);
@@ -1275,11 +1250,11 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
 }

 Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  auto it = sess_ids_.find(session_id);
  // never been created by any model
  if (it == sess_ids_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0);
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0, 0);
    if (ret == SUCCESS) {
      (void)sess_ids_.insert(session_id);
      GELOGI("The session: %lu create success.", session_id);
@@ -1289,13 +1264,13 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  return SUCCESS;
 }

 Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) {
  GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
 Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) {
  GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
  if (aicpu_kernel == nullptr) {
    GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str());
    return INTERNAL_ERROR;
    GELOGI("cust aicpu op %s has no corresponding kernel!", op_desc->GetName().c_str());
    return SUCCESS;
  }

  // get current context
@@ -1313,18 +1288,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_
    std::map<string, CustAICPUKernelPtr> new_so_name;
    new_so_name.insert({so_name, aicpu_kernel});
    cust_aicpu_so_[resource_id] = new_so_name;
    GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id);
    loaded = false;
    GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id);
    return SUCCESS;
  }
  auto it_so_name = it->second.find(so_name);
  if (it_so_name == it->second.end()) {
    it->second.insert({so_name, aicpu_kernel});
    GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id);
    loaded = false;
    GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id);
    return SUCCESS;
  }
  loaded = true;
  GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str());
  return SUCCESS;
 }

 Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
  GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  if (cust_aicpu_so_.size() == 0) return SUCCESS;
  // get current context
@@ -1488,8 +1469,7 @@ void ModelManager::GenModelId(uint32_t *id) {
  if (id == nullptr) {
    return;
  }

  std::lock_guard<std::mutex> lock(map_mutex_);
  std::lock_guard<std::recursive_mutex> lock(map_mutex_);
  *id = ++max_model_id_;
 }

@@ -1561,4 +1541,205 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
  return SUCCESS;
 }

 Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
                                              std::vector<std::string> &aicpu_tf_optype_list) {
  std::string kernel_name = "checkOpType";
  GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  std::vector<SysOpInfo> req_aicpu_op_info_list;
  std::vector<SysOpInfo> res_aicpu_op_info_list;
  std::vector<ReturnCode> res_ret_code_list;

  if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
    GELOGI("No need to check aicpu op type.");
    return SUCCESS;
  }

  vector<void *> allocated_mem;
  rtError_t status;
  rtStream_t stream = nullptr;
  void *args = nullptr;

  void *d_req_op_list = nullptr;
  void *d_res_op_list = nullptr;
  void *d_ret_code_list = nullptr;

  size_t aicpu_op_nums = aicpu_optype_list.size();
  size_t tf_op_nums = aicpu_tf_optype_list.size();
  size_t op_nums = aicpu_op_nums + tf_op_nums;
  std::function<void()> callback = [&]() {
    for (auto mem : allocated_mem) {
      GE_CHK_RT(rtFree(mem));
    }
  };
  GE_MAKE_GUARD(release, callback);
  // malloc sysOpInfoList in SysOpCheckInfo
  status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_req_op_list);

  // malloc sysOpInfoList in SysOpCheckResp
  status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_res_op_list);

  // malloc returnCodeList in SysOpCheckResp
  status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(d_ret_code_list);

  for (const auto &op_type : aicpu_optype_list) {
    SysOpInfo op_info;
    // malloc op_type name in SysOpInfo
    void *d_op_type_name = nullptr;
    status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
    if (status != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
      return RT_ERROR_TO_GE_STATUS(status);
    }
    allocated_mem.push_back(d_op_type_name);
    GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
    op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
    op_info.opLen = op_type.length();
    op_info.kernelsType = CPU_KERNEL;
    req_aicpu_op_info_list.emplace_back(op_info);
  }

  for (const auto &op_type : aicpu_tf_optype_list) {
    SysOpInfo op_info;
    // malloc op_type name in SysOpInfo
    void *d_op_type_name = nullptr;
    status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
    if (status != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
      return RT_ERROR_TO_GE_STATUS(status);
    }
    allocated_mem.push_back(d_op_type_name);
    GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
    op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
    op_info.opLen = op_type.size();
    op_info.kernelsType = TF_KERNEL;
    req_aicpu_op_info_list.emplace_back(op_info);
  }
  GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size());
  GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),
                     sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));

  SysOpCheckInfo op_check_info_req = { 0 };
  SysOpCheckResp op_check_info_res = { 0 };
  op_check_info_req.opListNum = op_nums;
  op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
  op_check_info_req.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));

  op_check_info_res.opListNum = 0;
  op_check_info_res.isWithoutJson = 0;
  op_check_info_res.returnCodeList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
  op_check_info_res.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));

  uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp);
  status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
    return RT_ERROR_TO_GE_STATUS(status);
  }
  allocated_mem.push_back(args);
  GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo),
                     RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtMemcpy(
    reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) +
    op_check_info_req.offSetLen)), sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res),
    sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
  GE_CHK_RT(rtStreamCreate(&stream, 0));
  GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));

  status = rtStreamSynchronize(stream);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
    GE_CHK_RT(rtStreamDestroy(stream));
    return RT_ERROR_TO_GE_STATUS(status);
  }

  // Check the response
  SysOpCheckResp *d_op_check_info_res =
    reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(
    reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
  (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp));
  GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
                     RT_MEMCPY_DEVICE_TO_HOST));

  if (op_check_info_res.isWithoutJson) {
    GELOGI("No need to check aicpu in this scenoria.");
    GE_CHK_RT(rtStreamDestroy(stream));
    return SUCCESS;
  }
  uint64_t res_op_nums = op_check_info_res.opListNum;
  GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums);
  if (res_op_nums != 0) {
    res_ret_code_list.clear();
    res_ret_code_list.resize(res_op_nums);
    res_aicpu_op_info_list.clear();
    res_aicpu_op_info_list.resize(res_op_nums);
    GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums,
                       reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.returnCodeList)),
                       sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
    GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums,
                       reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),
                       sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
    if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
      GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
      GE_CHK_RT(rtStreamDestroy(stream));
      return FAILED;
    }
    std::string fail_reason;
    for (uint32_t i = 0; i < res_op_nums; i++) {
      ReturnCode ret_code = res_ret_code_list.at(i);
      SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
             aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
      std::vector<char> op_name;
      op_name.clear();
      op_name.resize(kOpNameMaxSize);
      GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType),
                         aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
      std::string kernel_type =
          (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
      string op_name_str(op_name.data());
      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type +
                     "  ret code:" + std::to_string(static_cast<int>(ret_code)) +
                     "<0: op_type, 1: format, 2: datatype> \n";
    }
    fail_reason += "not support.";
    GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
    GE_CHK_RT(rtStreamDestroy(stream));
    return FAILED;
  }

  GE_CHK_RT(rtStreamDestroy(stream));
  GELOGI("Cpu kernel launch check optype task success.");
  return SUCCESS;
 }

 Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
  std::vector<std::string> aicpu_optype_list;
  std::vector<std::string> aicpu_tf_optype_list;
  bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
  bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
  if (!aicpu_need_check && !tf_need_check) {
    GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
    return SUCCESS;
  }
  GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
                    "Launch check aicpu op type failed.");
  return SUCCESS;
 }

 }  // namespace ge