Browse Source

!1096 update

From: @shenwei41
Reviewed-by: @lilongfei15,@xsmq
Signed-off-by: @xsmq
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
8dc712ca01
100 changed files with 893 additions and 361 deletions
  1. +1
    -0
      .gitignore
  2. +8
    -15
      CMakeLists.txt
  3. +30
    -30
      build.sh
  4. +14
    -9
      ge/CMakeLists.txt
  5. +12
    -1
      ge/common/CMakeLists.txt
  6. +9
    -9
      ge/common/helper/model_cache_helper.cc
  7. +3
    -3
      ge/common/helper/model_cache_helper.h
  8. +1
    -1
      ge/common/profiling/ge_profiling.cc
  9. +9
    -1
      ge/executor/CMakeLists.txt
  10. +26
    -1
      ge/ge_local_engine/CMakeLists.txt
  11. +19
    -5
      ge/ge_local_engine/engine/ge_local_engine.h
  12. +15
    -1
      ge/ge_local_engine/engine/host_cpu_engine.h
  13. +1
    -1
      ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h
  14. +15
    -1
      ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h
  15. +1
    -1
      ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h
  16. +1
    -1
      ge/ge_local_engine/ops_kernel_store/op/no_op.h
  17. +1
    -1
      ge/ge_local_engine/ops_kernel_store/op/op.h
  18. +2
    -2
      ge/ge_local_engine/ops_kernel_store/op/op_factory.h
  19. +5
    -1
      ge/ge_runtime/CMakeLists.txt
  20. +40
    -31
      ge/graph/build/memory/block_mem_assigner.cc
  21. +3
    -1
      ge/graph/build/memory/block_mem_assigner.h
  22. +2
    -2
      ge/graph/build/memory/graph_mem_assigner.cc
  23. +1
    -1
      ge/graph/build/run_context.cc
  24. +1
    -17
      ge/graph/build/stream_allocator.cc
  25. +0
    -1
      ge/graph/build/stream_allocator.h
  26. +2
    -9
      ge/graph/build/task_generator.cc
  27. +1
    -1
      ge/graph/common/transop_util.h
  28. +6
    -16
      ge/graph/load/model_manager/cpu_queue_schedule.cc
  29. +1
    -1
      ge/graph/load/model_manager/cpu_queue_schedule.h
  30. +54
    -53
      ge/graph/load/model_manager/davinci_model.cc
  31. +6
    -7
      ge/graph/load/model_manager/davinci_model.h
  32. +2
    -2
      ge/graph/load/model_manager/ts_mem_mall.h
  33. +7
    -3
      ge/graph/load/model_manager/zero_copy_offset.cc
  34. +4
    -3
      ge/graph/load/model_manager/zero_copy_offset.h
  35. +1
    -1
      ge/graph/manager/graph_manager.cc
  36. +2
    -2
      ge/graph/manager/graph_var_manager.h
  37. +1
    -1
      ge/graph/partition/graph_partition.cc
  38. +1
    -1
      ge/graph/partition/graph_partition.h
  39. +2
    -2
      ge/graph/passes/constant_folding_pass.cc
  40. +4
    -4
      ge/graph/passes/constant_folding_pass.h
  41. +5
    -0
      ge/graph/passes/hccl_continuous_memcpy_pass.cc
  42. +1
    -1
      ge/graph/passes/hccl_continuous_memcpy_pass.h
  43. +1
    -1
      ge/graph/passes/hccl_memcpy_pass.h
  44. +53
    -2
      ge/graph/passes/multi_batch_clone_pass.cc
  45. +2
    -0
      ge/graph/passes/multi_batch_clone_pass.h
  46. +1
    -1
      ge/graph/passes/switch_to_stream_switch_pass.h
  47. +1
    -1
      ge/graph/preprocess/multi_batch_copy_graph.cc
  48. +1
    -1
      ge/graph/preprocess/multi_batch_options.cc
  49. +3
    -3
      ge/graph/preprocess/multi_batch_options.h
  50. +26
    -1
      ge/host_cpu_engine/CMakeLists.txt
  51. +19
    -5
      ge/host_cpu_engine/engine/host_cpu_engine.h
  52. +15
    -1
      ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
  53. +15
    -1
      ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
  54. +1
    -1
      ge/host_cpu_engine/ops_kernel_store/op/host_op.h
  55. +1
    -1
      ge/host_cpu_engine/ops_kernel_store/op/op.h
  56. +2
    -2
      ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
  57. +1
    -1
      ge/hybrid/common/tensor_value.cc
  58. +3
    -1
      ge/hybrid/executor/hybrid_model_executor.cc
  59. +43
    -2
      ge/hybrid/executor/subgraph_executor.cc
  60. +14
    -1
      ge/hybrid/executor/subgraph_executor.h
  61. +7
    -2
      ge/hybrid/model/hybrid_model.cc
  62. +7
    -1
      ge/hybrid/model/hybrid_model.h
  63. +111
    -24
      ge/hybrid/model/hybrid_model_builder.cc
  64. +2
    -0
      ge/hybrid/model/hybrid_model_builder.h
  65. +3
    -1
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  66. +1
    -1
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  67. +3
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  68. +5
    -1
      ge/hybrid/node_executor/aicore/aicore_task_builder.cc
  69. +1
    -1
      ge/hybrid/node_executor/aicore/aicore_task_builder.h
  70. +2
    -2
      ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
  71. +2
    -2
      ge/hybrid/node_executor/ge_local/ge_local_node_executor.h
  72. +1
    -1
      ge/init/gelib.h
  73. +5
    -5
      ge/ir_build/atc_ir_common.cc
  74. +4
    -4
      ge/ir_build/atc_ir_common.h
  75. +1
    -1
      ge/ir_build/ge_ir_build.cc
  76. +12
    -0
      ge/offline/CMakeLists.txt
  77. +1
    -1
      ge/opskernel_manager/ops_kernel_builder_manager.h
  78. +1
    -1
      ge/opskernel_manager/ops_kernel_manager.h
  79. +6
    -0
      ge/plugin/engine/CMakeLists.txt
  80. +8
    -8
      ge/plugin/engine/dnnengines.h
  81. +16
    -2
      ge/plugin/engine/engine_manage.h
  82. +17
    -0
      ge/session/inner_session.cc
  83. +2
    -2
      ge/session/omg.cc
  84. +19
    -1
      ge/single_op/single_op.cc
  85. +4
    -1
      ge/single_op/single_op.h
  86. +40
    -0
      ge/single_op/single_op_model.cc
  87. +4
    -0
      ge/single_op/stream_resource.cc
  88. +1
    -0
      ge/single_op/stream_resource.h
  89. +1
    -1
      ge/stub/gen_stubapi.py
  90. +5
    -5
      inc/external/ge/ge_api.h
  91. +2
    -2
      inc/external/ge/ge_api_error_codes.h
  92. +14
    -0
      inc/external/ge/ge_error_codes.h
  93. +27
    -13
      inc/external/ge/ge_ir_build.h
  94. +1
    -1
      inc/framework/common/debug/ge_log.h
  95. +1
    -1
      inc/framework/common/debug/log.h
  96. +16
    -2
      inc/framework/common/fmk_error_codes.h
  97. +1
    -1
      inc/framework/common/ge_format_util.h
  98. +1
    -1
      inc/framework/common/ge_types.h
  99. +15
    -1
      inc/framework/common/gflags_util.h
  100. +1
    -1
      inc/framework/common/helper/model_helper.h

+ 1
- 0
.gitignore View File

@@ -2,6 +2,7 @@
/build
/output
/prebuilts
/cov
*.ir
*.out



+ 8
- 15
CMakeLists.txt View File

@@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC)
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
if(PLATFORM STREQUAL "train")
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC)
elseif(PLATFORM STREQUAL "inference")
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(resource libresource.so ${ASCEND_ATC_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
if(PRODUCT STREQUAL "flr3")
elseif(PRODUCT STREQUAL "flr1")
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
@@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC)
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
endif()
elseif(PLATFORM STREQUAL "all")
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(resource libresource.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
else()
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
endif()


+ 30
- 30
build.sh View File

@@ -166,14 +166,14 @@ build_graphengine()
echo "execute command: cmake ${CMAKE_ARGS} .. failed."
return 1
fi
COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
TARGET=${COMMON_TARGET}
if [ "x${PLATFORM}" = "xtrain" ]
then
TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}"
TARGET="ge_runner fwk_atc.bin ${TARGET}"
elif [ "x${PLATFORM}" = "xinference" ]
then
TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}"
TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}"
elif [ "X$ENABLE_GE_UT" = "Xon" ]
then
TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest"
@@ -183,7 +183,7 @@ build_graphengine()
elif [ "x${PLATFORM}" = "xall" ]
then
# build all the target
TARGET=""
TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}"
fi
make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install
@@ -198,8 +198,6 @@ g++ -v
mk_dir ${OUTPUT_PATH}
build_graphengine || { echo "GraphEngine build failed."; return; }
echo "---------------- GraphEngine build finished ----------------"
#cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}"
#rm -rf "${OUTPUT_PATH}/"libproto*
rm -f ${OUTPUT_PATH}/libgmock*.so
rm -f ${OUTPUT_PATH}/libgtest*.so
rm -f ${OUTPUT_PATH}/lib*_stub.so
@@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500

echo "---------------- GraphEngine output generated ----------------"

# if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then
# cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH}
# fi

if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH}
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH}
@@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH}
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH}

# if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then
# export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH}
# echo ${LD_LIBRARY_PATH}
${OUTPUT_PATH}/ut_libgraph &&
${OUTPUT_PATH}/ut_libge_multiparts_utest &&
${OUTPUT_PATH}/ut_libge_distinct_load_utest &&
@@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!"
exit 1;
fi
# fi

# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
echo "Generating coverage statistics, please wait..."
cd ${BASEPATH}
rm -rf ${BASEPATH}/cov
mkdir ${BASEPATH}/cov
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
cd ${BASEPATH}/cov
genhtml coverage.info
echo "Generating coverage statistics, please wait..."
cd ${BASEPATH}
rm -rf ${BASEPATH}/cov
mkdir ${BASEPATH}/cov
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
cd ${BASEPATH}/cov
genhtml coverage.info
fi

# generate output package in tar form, including ut/st libraries/executables
@@ -256,6 +244,8 @@ generate_package()
ATC_PATH="atc/lib64"
ATC_BIN_PATH="atc/bin"
FWK_BIN_PATH="fwkacllib/bin"
FWK_INCLUDE_PATH="fwkacllib/include"
ATC_INCLUDE_PATH="atc/include"
NNENGINE_PATH="plugin/nnengine/ge_config"
OPSKERNEL_PATH="plugin/opskernel"

@@ -277,6 +267,8 @@ generate_package()
mk_dir "${OUTPUT_PATH}/${ACL_PATH}"
mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}"
mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}"
mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}"
mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}"
cd "${OUTPUT_PATH}"

@@ -289,10 +281,10 @@ generate_package()
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \;

MAX_DEPTH=1
if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
then
MAX_DEPTH=2
fi
# if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
# then
# MAX_DEPTH=2
# fi
for lib in "${PLUGIN_OPSKERNEL[@]}";
do
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \;
@@ -318,7 +310,15 @@ generate_package()
find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \;
find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \;
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \;

cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH}
cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH}
cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH}

cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH}
cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH}
cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH}

if [ "x${PLATFORM}" = "xtrain" ]
then
tar -cf graphengine_lib.tar fwkacllib
@@ -339,4 +339,4 @@ then
find ./ -name graphengine_lib.tar -exec rm {} \;
tar -cf graphengine_lib.tar lib
fi
echo "---------------- GraphEngine package archive generated ----------------"
echo "---------------- GraphEngine package archive generated ----------------"

+ 14
- 9
ge/CMakeLists.txt View File

@@ -639,15 +639,6 @@ set(INFER_SRC_LIST
"graph/load/model_manager/task_info/model_exit_task_info.cc"
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc"
"single_op/task/op_task.cc"
"single_op/task/build_task_utils.cc"
"single_op/task/tbe_task_builder.cc"
"single_op/task/aicpu_task_builder.cc"
"single_op/task/aicpu_kernel_task_builder.cc"
"single_op/single_op.cc"
"single_op/single_op_model.cc"
"single_op/stream_resource.cc"
"single_op/single_op_manager.cc"
"hybrid/hybrid_davinci_model_stub.cc"
"ir_build/ge_ir_build.cc"
"ir_build/atc_ir_common.cc"
@@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE
FMK_SUPPORT_DUMP
DAVINCI_CLOUD
google=ascend_private
FUNC_VISIBILITY
)

target_compile_options(ge_runner PRIVATE
-O2
-fno-common
-fvisibility=hidden
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
)
@@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(ge_runner PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_runner PRIVATE
$<BUILD_INTERFACE:intf_pub>
adump_server
@@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE
FMK_HOST_INFER
COMPILE_OMG_PACKAGE
google=ascend_private
FUNC_VISIBILITY
)

target_compile_options(ge_compiler PRIVATE
-O2
-fno-common
-fvisibility=hidden
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
)
@@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(ge_compiler PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_compiler PRIVATE
$<BUILD_INTERFACE:intf_pub>
static_mmpa
@@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE
-O2
-fvisibility=hidden
)

target_link_options(opensrc_ascendcl PRIVATE
-rdynamic
-Wl,--allow-multiple-definition
@@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE
-Wl,-Bsymbolic
-Wl,--exclude-libs,ALL
)

target_link_libraries(opensrc_ascendcl PRIVATE
-Wl,--whole-archive
ge_executor


+ 12
- 1
ge/common/CMakeLists.txt View File

@@ -12,7 +12,7 @@ set(PROTO_LIST
"${METADEF_DIR}/proto/tensorflow/tensor.proto"
"${METADEF_DIR}/proto/tensorflow/tensor_shape.proto"
"${METADEF_DIR}/proto/tensorflow/types.proto"
"${METADEF_DIR}/proto/tensorflow/versions.proto"
"${METADEF_DIR}/proto/tensorflow/versions.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
@@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE
FMK_SUPPORT_DUMP
OS_CENTOS
google=ascend_private
FUNC_VISIBILITY
)

target_compile_options(ge_common PRIVATE
@@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(ge_common PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_common PRIVATE
$<BUILD_INTERFACE:intf_pub>
static_mmpa
@@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
LOG_CPP
FUNC_VISIBILITY
)

target_compile_options(ge_common_static PRIVATE
@@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE
OS_CENTOS
google=ascend_private
LOG_CPP
FUNC_VISIBILITY
)

target_compile_options(ge_common PRIVATE
@@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(ge_common PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_common PRIVATE
$<BUILD_INTERFACE:intf_pub>
ascend_protobuf_static


+ 9
- 9
ge/common/helper/model_cache_helper.cc View File

@@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const {
return false;
}
// Compare allocated graph id info between json and VarManager
std::unordered_map<std::string, uint32_t> allocated_graph_id;
std::map<std::string, uint32_t> allocated_graph_id;
auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
if (ret != SUCCESS) {
GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const {
return false;
}
// Compare variable changed graph id info between json and VarManager
std::unordered_map<std::string, uint32_t> changed_graph_id;
std::map<std::string, uint32_t> changed_graph_id;
auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
if (ret != SUCCESS) {
GELOGW("Fail to parse ChangedGraphId from Json.");
@@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const {
}
// Compare variable address info between json and VarManager
std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
std::unordered_set<uint64_t> var_offset_set;
std::set<uint64_t> var_offset_set;
auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
if (ret != SUCCESS) {
GELOGW("Fail to parse VarAddrMgrMap from Json.");
@@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const {
GELOGW("Input param json type should be null or array.");
return PARAM_INVALID;
}
std::unordered_map<std::string, uint32_t> allocated_graph_id;
std::map<std::string, uint32_t> allocated_graph_id;
auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
if (ret != SUCCESS) {
GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const {
GELOGW("Input param json type should be null or array.");
return PARAM_INVALID;
}
std::unordered_map<std::string, uint32_t> changed_graph_id;
std::map<std::string, uint32_t> changed_graph_id;
auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
if (ret != SUCCESS) {
GELOGW("Fail to parse AllocatedGraphId from Json.");
@@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
return PARAM_INVALID;
}
std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
std::unordered_set<uint64_t> var_offset_set;
std::set<uint64_t> var_offset_set;
auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
if (ret != SUCCESS) {
GELOGW("Fail to parse VarAddrMgrMap from Json.");
@@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemTyp

Status ModelCacheHelper::ParseVarAddrMgrMapFromJson(
const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
std::unordered_set<uint64_t> &var_offset_set) {
std::set<uint64_t> &var_offset_set) {
if (!(json.is_array() || json.is_null())) {
GELOGW("Input param json type should be null or array.");
return PARAM_INVALID;
@@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson(
}

Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
std::unordered_map<std::string, uint32_t> &changed_graph_id) {
std::map<std::string, uint32_t> &changed_graph_id) {
if (!(json.is_array() || json.is_null())) {
GELOGW("Input param json type should be null or array.");
return PARAM_INVALID;
@@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
}

Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json,
std::unordered_map<std::string, uint32_t> &allocated_graph_id) {
std::map<std::string, uint32_t> &allocated_graph_id) {
if (!(json.is_array() || json.is_null())) {
GELOGW("Input param json type should be null or array.");
return PARAM_INVALID;


+ 3
- 3
ge/common/helper/model_cache_helper.h View File

@@ -95,15 +95,15 @@ class ModelCacheHelper {
static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource);
static Status ParseVarAddrMgrMapFromJson(const Json &json,
std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
std::unordered_set<uint64_t> &var_offset_set);
std::set<uint64_t> &var_offset_set);
static Status ParseCurVarTensorDescMapFromJson(
const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map);
static Status ParseTransRoadsFromJson(const Json &json,
std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads);
static Status ParseChangedGraphIdFromJson(const Json &json,
std::unordered_map<std::string, uint32_t> &changed_graph_id);
std::map<std::string, uint32_t> &changed_graph_id);
static Status ParseAllocatedGraphIdFromJson(const Json &json,
std::unordered_map<std::string, uint32_t> &allocated_graph_id);
std::map<std::string, uint32_t> &allocated_graph_id);
static Status ParseBroadcastInfoFromJson(const Json &json,
std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info);
static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name);


+ 1
- 1
ge/common/profiling/ge_profiling.cc View File

@@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
return false;
}

std::unordered_set<uint32_t> record;
std::set<uint32_t> record;
for (size_t i = 0; i < device_nums; ++i) {
uint32_t dev_id = deviceid_list[i];
if (dev_id >= static_cast<uint32_t>(dev_count)) {


+ 9
- 1
ge/executor/CMakeLists.txt View File

@@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd>
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT>
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable>
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format>
)

target_compile_definitions(ge_executor PRIVATE
@@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE
LOG_CPP
)

target_include_directories(ge_executor PRIVATE
target_include_directories(ge_executor SYSTEM PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
@@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE
-Werror
-O2
-Wno-deprecated-declarations
-fvisibility=hidden
)

target_compile_definitions(ge_executor_shared PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
DAVINCI_SUPPORT_PROFILING
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(ge_executor_shared PRIVATE
@@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(ge_executor_shared PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_executor_shared PRIVATE
$<BUILD_INTERFACE:intf_pub>
msprofiler


+ 26
- 1
ge/ge_local_engine/CMakeLists.txt View File

@@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
target_compile_options(ge_local_engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(ge_local_engine PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(ge_local_engine PRIVATE
@@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(ge_local_engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_local_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
target_compile_options(atc_ge_local_engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_ge_local_engine PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(atc_ge_local_engine PRIVATE
@@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(atc_ge_local_engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_ge_local_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR
target_compile_options(ge_local_opskernel_builder PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(ge_local_opskernel_builder PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(ge_local_opskernel_builder PRIVATE
@@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(ge_local_opskernel_builder PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_local_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO
target_compile_options(atc_ge_local_opskernel_builder PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(atc_ge_local_opskernel_builder PRIVATE
@@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(atc_ge_local_opskernel_builder PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_ge_local_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR
target_compile_options(ge_local_opskernel_builder_static PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
FUNC_VISIBILITY
)

target_include_directories(ge_local_opskernel_builder_static PRIVATE


+ 19
- 5
ge/ge_local_engine/engine/ge_local_engine.h View File

@@ -17,6 +17,20 @@
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_
#define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <memory>
#include <string>
@@ -32,7 +46,7 @@ namespace ge_local {
* ge local engine.
* Used for the ops not belong to any engine. eg:netoutput
*/
class GeLocalEngine {
class GE_FUNC_VISIBILITY GeLocalEngine {
public:
/**
* get GeLocalEngine instance.
@@ -94,25 +108,25 @@ extern "C" {
* When Ge start, GE will invoke this interface
* @return The status whether initialize successfully
*/
ge::Status Initialize(const map<string, string> &options);
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);

/**
* After the initialize, GE will invoke this interface to get the Ops kernel Store
* @param ops_kernel_map The ge local's ops kernel info
*/
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);

/**
* After the initialize, GE will invoke this interface to get the Graph Optimizer
* @param graph_optimizers The ge local's Graph Optimizer objs
*/
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);

/**
* When the graph finished, GE will invoke this interface
* @return The status whether initialize successfully
*/
ge::Status Finalize();
GE_FUNC_VISIBILITY ge::Status Finalize();
}

#endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_

+ 15
- 1
ge/ge_local_engine/engine/host_cpu_engine.h View File

@@ -16,6 +16,20 @@
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
#define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <mutex>
#include "framework/common/ge_inner_error_codes.h"
#include "graph/node.h"
@@ -23,7 +37,7 @@
#include "external/../register/register.h"

namespace ge {
class HostCpuEngine {
class GE_FUNC_VISIBILITY HostCpuEngine {
public:
~HostCpuEngine() = default;



+ 1
- 1
ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h View File

@@ -22,7 +22,7 @@

namespace ge {
namespace ge_local {
class GeLocalOpsKernelBuilder : public OpsKernelBuilder {
class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder {
public:
~GeLocalOpsKernelBuilder() override;
Status Initialize(const map<std::string, std::string> &options) override;


+ 15
- 1
ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h View File

@@ -17,6 +17,20 @@
#ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_
#define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <string>
#include <vector>
@@ -25,7 +39,7 @@

namespace ge {
namespace ge_local {
class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore {
class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore {
public:
GeLocalOpsKernelInfoStore() = default;



+ 1
- 1
ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h View File

@@ -21,7 +21,7 @@

namespace ge {
namespace ge_local {
class GeDeletedOp : public Op {
class GE_FUNC_VISIBILITY GeDeletedOp : public Op {
public:
GeDeletedOp(const Node &node, RunContext &run_context);



+ 1
- 1
ge/ge_local_engine/ops_kernel_store/op/no_op.h View File

@@ -21,7 +21,7 @@

namespace ge {
namespace ge_local {
class NoOp : public Op {
class GE_FUNC_VISIBILITY NoOp : public Op {
public:
NoOp(const Node &node, RunContext &run_context);



+ 1
- 1
ge/ge_local_engine/ops_kernel_store/op/op.h View File

@@ -29,7 +29,7 @@ namespace ge_local {
/**
* The base class for all op.
*/
class Op {
class GE_FUNC_VISIBILITY Op {
public:
Op(const Node &node, RunContext &run_context);



+ 2
- 2
ge/ge_local_engine/ops_kernel_store/op/op_factory.h View File

@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte
/**
* manage all the op, support create op.
*/
class OpFactory {
class GE_FUNC_VISIBILITY OpFactory {
public:
static OpFactory &Instance();

@@ -72,7 +72,7 @@ class OpFactory {
std::vector<std::string> all_ops_;
};

class OpRegistrar {
class GE_FUNC_VISIBILITY OpRegistrar {
public:
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
OpFactory::Instance().RegisterCreator(type, func);


+ 5
- 1
ge/ge_runtime/CMakeLists.txt View File

@@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE
-fno-common
)

target_compile_definitions(ge_runtime PRIVATE
target_compile_definitions(ge_runtime PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
LOG_CPP
)
@@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE
${CMAKE_BINARY_DIR}/proto/ge
)

target_link_options(ge_runtime PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(ge_runtime PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed


+ 40
- 31
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
}
}
reusable_block->continuous_block_ = continuous;
reusable_block->ref_count_++;
reusable_blocks_[memory_type][stream_id].erase((++it).base());
return reusable_block;
}
@@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);
block->stream_id_ = node_op_desc->GetStreamId();
block->ref_count_++;
block->continuous_block_ = continuous;
block->batch_label_ = batch_label;
if (mem_type == kOutput) {
@@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
// hccl task need align header and tail
block->first_continuous_block_ = true;
block->last_continuous_block_ = true;
++(block->ref_count_);
} else {
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
return INTERNAL_ERROR;
@@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
return nullptr, "Get no align size failed");

std::string symbol;
bool reuse_input = false;
if (IsSymbolExist(node_index_io, symbol)) {
block = symbol_blocks_[symbol];
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str());
@@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
block->SetLifeTimeEnd(life_time_);
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size);
block->ref_count_++;
reuse_input = true;

// add new size
align_size = block_size;
@@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type);
}
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr.");
int out_count_reuse_input = block->ref_count_;
int out_count = 0;
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr);
auto out_data_anchor = n->GetOutDataAnchor(index);
@@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
out_count++;
}
}
bool reuse_input = false;
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
auto owner_node = in_anchor->GetOwnerNode();
GE_IF_BOOL_EXEC(owner_node == nullptr, continue);
auto op_desc = owner_node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
bool dst_reuse_input = false;
uint32_t dst_reuse_input_index = 0;
auto owner_node_op_desc = op_desc->GetOutputDescPtr(i);
GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue);
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS,
GELOGI("Get dst_reuse_input failed"));
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
GELOGI("Get dst_reuse_input_index failed"));
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
out_count_reuse_input += 1;
reuse_input = true;
}
}
}
block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count;
block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1)
: (block->ref_count_ + out_count);
return block;
}

@@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index,
node_type_indexs.back().node->GetName().c_str());

if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
bool is_block_matched = false;
for (auto &node_type_index : node_type_indexs) {
is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
(node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()));
if (is_block_matched) {
GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, "
"current node:%s, input index:%d, block ref_count:%d.",
node_type_index.node->GetName().c_str(), node_type_index.index,
node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_);
break;
}
}

if (is_block_matched) {
ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
if (block->ref_count_ == 0 && block->same_stream_) {
SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
}
break;
}
}
}
@@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
return;
}

void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) {
if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) {
return;
}
int64_t stream_id = node->GetOpDesc()->GetStreamId();
auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx));
bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0);
if (is_suspended) {
block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1);
stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block);
GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, "
"size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_);
}
}

Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) {
auto op_desc = node->GetOpDesc();
int64_t stream_id = op_desc->GetStreamId();
@@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
// Allocate memory for the current node and release node memory of the same size in the workspace
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]);
iter->second[stream_id].clear();});
if (IsContinuousOutput(node)) {
return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
}
@@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
continue;
}
symbol_blocks_[iter->second] = mem_block;
// The output is suspended, and will be released in allocation of next node.
CheckAndReleaseSuspendedBlock(node, i, mem_block);
}
}
return SUCCESS;
@@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) {
return;
}
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) {
iter->second[stream_id].clear();
}
vector<int64_t> temp;
int64_t tatal_size = 0;
GetNodeWorkSpaceSize(n, temp, tatal_size);
@@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag,
is_op_reuse_mem_, false, memory_type);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block.");
++(mem_block->ref_count_);
CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type);
}
for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) {


+ 3
- 1
ge/graph/build/memory/block_mem_assigner.h View File

@@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner {

void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node);

void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block);

std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;

std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_;
@@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner {

std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_;

std::unordered_map<std::string, uint32_t> node_continuous_input_counts_;
std::map<std::string, uint32_t> node_continuous_input_counts_;

// reuse memory
vector<string> op_no_reuse_mem_vec_;


+ 2
- 2
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,

GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
"size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(),
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
}
@@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node
}
GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
" size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
}


+ 1
- 1
ge/graph/build/run_context.cc View File

@@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even
// Create rt label
for (uint32_t i = 0; i < label_num; ++i) {
rtLabel_t label = nullptr;
rt_ret = rtLabelCreate(&label);
rt_ret = rtLabelCreateV2(&label, rt_model_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
return RT_FAILED;


+ 1
- 17
ge/graph/build/stream_allocator.cc View File

@@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
}
}

Status status = ReorderEventNodes();
Status status = whole_graph_->InsertGraphEvents();
if (status != SUCCESS) {
GELOGE(status, "Graph ReorderEventNodes failed");
return status;
@@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() {
return SUCCESS;
}

Status StreamAllocator::ReorderEventNodes() const {
Status status = whole_graph_->InsertEventNodes();
if (status != SUCCESS) {
GELOGE(status, "Whole graph InsertEventNodes failed");
return status;
}
for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) {
status = subgraph->InsertEventNodes();
if (status != SUCCESS) {
GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str());
return status;
}
}
return SUCCESS;
}

void StreamAllocator::DumpEvents() {
map<int64_t, vector<NodePtr>> after_refresh_stream_nodes;
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {


+ 0
- 1
ge/graph/build/stream_allocator.h View File

@@ -74,7 +74,6 @@ class StreamAllocator {
Status RefreshContinuousEvents();

Status InsertSyncEventNodes();
Status ReorderEventNodes() const;

void DumpEvents();



+ 2
- 9
ge/graph/build/task_generator.cc View File

@@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion
// and it have no attr or group attr different
// which means bad case, return error
bool call_check = true;
std::unordered_set<int64_t> input_group_ids;
std::set<int64_t> input_group_ids;
for (const auto &input_node : node->GetInNodes()) {
auto iter = nodes_with_group_attr.find(input_node);
if (iter == nodes_with_group_attr.end()) {
@@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {
return GE_GRAPH_GRAPH_NODE_NULL;
}

int64_t node_index = 0;
for (auto &node : all_nodes) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
op_desc->SetId(node_index++);
}

map<int64_t, vector<OpDescPtr>> all_stream_ops;
for (auto &node : all_nodes) {
OpDescPtr op_desc = node->GetOpDesc();
@@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin
}

if (graph->GetNeedIteration()) {
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
profiling_point.end_index.insert(current_idx);
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
op_desc->GetName().c_str(), current_idx);


+ 1
- 1
ge/graph/common/transop_util.h View File

@@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil {

static TransOpUtil &Instance();

typedef std::unordered_map<std::string, int> transop_index_op;
typedef std::map<std::string, int> transop_index_op;
transop_index_op transop_index_map_;
};
} // namespace ge


+ 6
- 16
ge/graph/load/model_manager/cpu_queue_schedule.cc View File

@@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() {
/// @param [in] outside_addrs: model input/output memory addr
/// @return: 0 for success / others for failed
///
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) {
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
if ((args_ != nullptr) || (args_size_ > 0)) {
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
@@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

AddrMapInfo addr_map_info;
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
}
}
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

// init src_addrs/dst_addrs
size_t index = 0;
vector<uint64_t> src_addrs;
vector<uint64_t> dst_addrs;
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
for (const auto &addrs : outside_addrs) {
const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
src_addrs.push_back(mbuf_list.at(index));
src_addrs.emplace_back(mbuf_list.at(addrs.first));
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
}
}
index++;
}
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));


+ 1
- 1
ge/graph/load/model_manager/cpu_queue_schedule.h View File

@@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo {
~CpuTaskZeroCopy() override;

Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);

Status Distribute() override;
private:


+ 54
- 53
ge/graph/load/model_manager/davinci_model.cc View File

@@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
};

vector<OpDescPtr> output_op_list;
set<const void *> input_outside_addrs;
set<const void *> output_outside_addrs;
map<uint32_t, OpDescPtr> data_by_index;
map<string, OpDescPtr> variable_by_name;
auto nodes = compute_graph->GetAllNodes();
@@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc);

if (IsDataOp(op_desc->GetType())) {
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) {
if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) {
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
return PARAM_INVALID;
}
@@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
}

if (op_desc->GetType() == NETOUTPUT) {
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) {
if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) {
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
return PARAM_INVALID;
}
@@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
/// @return Status
///
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index) {
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) {
// op_desc Checked by Init: Data, valid.
auto op_desc = node->GetOpDesc();
if (node->GetOwnerComputeGraph() != graph) {
@@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str());
return PARAM_INVALID;
}
new_input_data_info_[data_index] = zero_copy_offset;

for (size_t index = 0; index < virtual_addr_list.size(); ++index) {
void *addr = virtual_addr_list.at(index);
if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) {
continue;
}
zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_);
new_input_outside_addrs_[addr] = zero_copy_offset;
if (input_outside_addrs.count(virtual_addr) == 0) {
int64_t output_offset = output_offset_list.at(kDataIndex);
zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_);
input_outside_addrs.insert(virtual_addr);
}
input_data_info_[data_index] = zero_copy_offset;

return SUCCESS;
}
@@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node,
vector<OpDescPtr> &output_op_list) {
vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) {
// node->GetOpDesc Checked by Init: NetOutput, valid.
auto op_desc = node->GetOpDesc();
// excludes the function op sub graph, e.g. case,if
@@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
return PARAM_INVALID;
}

size_t num = new_output_data_info_.size();
size_t num = output_data_info_.size();
bool fusion_flag = false;

size_t input_count = input_size_list.size();
@@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag);
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.",
op_desc->GetName().c_str()); return PARAM_INVALID;);
new_output_data_info_[num + idx] = zero_copy_offset;
void *addr = virtual_addr_list.at(idx);
int64_t input_offset = input_offset_list.at(idx);
vector<void *> tensor_addrs;
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset));
if (!rslt.second) {
if (output_outside_addrs.count(addr) == 0) {
vector<void *> tensor_addrs;
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
output_outside_addrs.insert(addr);
for (size_t i = 0; i < tensor_addrs.size(); ++i) {
void *real_addr = tensor_addrs.at(i);
DisableZeroCopy(real_addr);
real_virtual_addrs_.insert(real_addr);
}
} else {
GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str());
DisableZeroCopy(addr);
}

for (size_t i = 0; i < tensor_addrs.size(); ++i) {
void *real_addr = tensor_addrs.at(i);
DisableZeroCopy(real_addr);
real_virtual_addrs_.insert(real_addr);
}
output_data_info_[num + idx] = zero_copy_offset;
}
return SUCCESS;
}
@@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
}

rtLabel_t rt_label = nullptr;
rtError_t rt_error = rtLabelCreateEx(&rt_label, stream);
rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream);
if (rt_error != RT_ERROR_NONE || rt_label == nullptr) {
GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error);
return INTERNAL_ERROR;
@@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() {
return SUCCESS;
}

if (input_queue_ids_.size() != new_input_data_info_.size()) {
if (input_queue_ids_.size() != input_data_info_.size()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu",
input_queue_ids_.size(), new_input_data_info_.size());
input_queue_ids_.size(), input_data_info_.size());
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
}

if (output_queue_ids_.size() != new_output_data_info_.size()) {
if (output_queue_ids_.size() != output_data_info_.size()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID,
"Output queue ids not match model: output_queue=%zu output_data=%zu",
output_queue_ids_.size(), new_output_data_info_.size());
output_queue_ids_.size(), output_data_info_.size());
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
}

GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed.");
// Binding input_queue and Data Op.
GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed.");

// Binding output_queue and NetOutput Op.
GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed.");

GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed.");
GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed.");
@@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() {
Status DavinciModel::BindInputQueue() {
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
for (size_t i = 0; i < input_queue_ids_.size(); ++i) {
auto it = new_input_data_info_.find(i);
if (it == new_input_data_info_.end()) {
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i);
auto it = input_data_info_.find(i);
if (it == input_data_info_.end()) {
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i);
return FAILED;
}

@@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
}

Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
std::map<const void *, ZeroCopyOffset> &outside_addrs) {
const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
GELOGI("Set CpuKernel model zero_copy task enter.");
std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_);
if (zero_copy == nullptr) {
@@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
Status DavinciModel::BindOutputQueue() {
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
auto it = new_output_data_info_.find(i);
if (it == new_output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
auto it = output_data_info_.find(i);
if (it == output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
return FAILED;
}

@@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() {

Status DavinciModel::BindEnqueue() {
for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
auto it = new_output_data_info_.find(i);
if (it == new_output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
auto it = output_data_info_.find(i);
if (it == output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
return FAILED;
}

@@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs
Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
const std::vector<DataBuffer> &blobs = input_data.blobs;
for (const auto &data : new_input_data_info_) {
for (const auto &data : input_data_info_) {
if (data.first >= blobs.size()) {
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(),
new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
data.second.GetOpName().c_str());
return FAILED;
}
@@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r

output_data.index = data_id;
output_data.model_id = model_id_;
if (output_data.blobs.size() != new_output_data_info_.size()) {
if (output_data.blobs.size() != output_data_info_.size()) {
GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(),
new_output_data_info_.size());
output_data_info_.size());
return FAILED;
}

std::vector<DataBuffer> &blobs = output_data.blobs;
size_t idx = 0;
for (const auto &output : new_output_data_info_) {
for (const auto &output : output_data_info_) {
if (output.first >= blobs.size()) {
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(),
new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
return FAILED;
}

@@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
/// @return None.
///
void DavinciModel::SetCopyOnlyOutput() {
for (const auto &output_outside_addrs : new_output_outside_addrs_) {
for (const auto &output_outside_addrs : output_data_info_) {
ZeroCopyOffset output_outside = output_outside_addrs.second;
if (!output_outside.IsRelativeOffsetValid()) {
return;
}
for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) {
auto &addrs_mapping_list = output_outside.GetOutsideAddrs();
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count];
@@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v
for (size_t i = 0; i < nums; ++i) {
std::lock_guard<std::mutex> lock(outside_addrs_mutex_);

for (auto &input_outside_addrs : new_input_outside_addrs_) {
for (auto &input_outside_addrs : input_data_info_) {
ZeroCopyOffset &input_outside = input_outside_addrs.second;
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
}

for (auto &output_outside_addrs : new_output_outside_addrs_) {
for (auto &output_outside_addrs : output_data_info_) {
ZeroCopyOffset &output_outside = output_outside_addrs.second;
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
}
@@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
/// @return SUCCESS handle successfully / PARAM_INVALID for failed
///
Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) {
if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed.");
return ACL_ERROR_GE_PARAM_INVALID;
}

if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed.");
return ACL_ERROR_GE_PARAM_INVALID;


+ 6
- 7
ge/graph/load/model_manager/davinci_model.h View File

@@ -675,7 +675,7 @@ class DavinciModel {
/// @return Status
///
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index);
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);

///
/// @ingroup ge
@@ -694,7 +694,8 @@ class DavinciModel {
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status
///
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list);
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
set<const void *> &output_outside_addrs);

///
/// @ingroup ge
@@ -764,7 +765,7 @@ class DavinciModel {
///
Status BindInputQueue();

Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs);
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);

///
/// @ingroup ge
@@ -897,10 +898,8 @@ class DavinciModel {
void *global_step_addr_{nullptr};
uint64_t global_step_size_{0};

map<uint32_t, ZeroCopyOffset> new_input_data_info_;
map<uint32_t, ZeroCopyOffset> new_output_data_info_;
map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
map<const void *, ZeroCopyOffset> new_output_outside_addrs_;
map<uint32_t, ZeroCopyOffset> input_data_info_;
map<uint32_t, ZeroCopyOffset> output_data_info_;

set<const void *> real_virtual_addrs_;



+ 2
- 2
ge/graph/load/model_manager/ts_mem_mall.h View File

@@ -100,8 +100,8 @@ class TsMemMall {

private:
std::mutex mem_mutex_;
std::unordered_map<int64_t, void *> mem_store_size_;
std::unordered_map<void *, int64_t> mem_store_addr_;
std::map<int64_t, void *> mem_store_size_;
std::map<void *, int64_t> mem_store_addr_;
rtMemType_t mem_type_;
};
} // namespace ge


+ 7
- 3
ge/graph/load/model_manager/zero_copy_offset.cc View File

@@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const
}
}

void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
bool fusion_flag, std::set<const void *> &real_virtual_addrs) {
void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag,
set<const void *> &real_virtual_addrs) {
uint32_t out_count = 0;
if (!fusion_flag) {
out_count++;
@@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
real_virtual_addrs.insert(addr);
} else {
GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
int64_t output_offset = output_offset_list.at(index);
for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
if (zero_copy_basic_offset_.at(i) == output_offset) {
out_count++;
@@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
}
}
addr_count_ = out_count;
valid_relative_offset_ = true;
}

void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
@@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
}
}
addr_count_ = out_count;
valid_relative_offset_ = true;
}

void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
if (!valid_relative_offset_) {
return;
}
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
auto args_addrs = outside_addrs_[out_count].find(outside_addr);


+ 4
- 3
ge/graph/load/model_manager/zero_copy_offset.h View File

@@ -43,8 +43,7 @@ class ZeroCopyOffset {
~ZeroCopyOffset();

Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
bool fusion_flag, std::set<const void *> &real_virtual_addrs);
void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs);

void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag);
Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list,
@@ -65,9 +64,10 @@ class ZeroCopyOffset {
// data_size of Data/Netoutput
int64_t GetDataSize() const { return data_size_; }
// value of *outside_addrs_ from davinci_model
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
// name of op
std::string GetOpName() const { return op_name_; }
const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }

private:
void *basic_addr_ = nullptr;
@@ -81,6 +81,7 @@ class ZeroCopyOffset {

std::vector<int64_t> zero_copy_basic_offset_;
std::vector<int64_t> zero_copy_relative_offset_;
bool valid_relative_offset_ = false;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_

+ 1
- 1
ge/graph/manager/graph_manager.cc View File

@@ -131,7 +131,7 @@ bool IsTailingOptimization() {
}

ge::Status CheckFpCeilingMode() {
static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
static const std::set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
string mode;
auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode);
if (ret == ge::GRAPH_SUCCESS) {


+ 2
- 2
ge/graph/manager/graph_var_manager.h View File

@@ -170,8 +170,8 @@ class VarResource {
std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_;
std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_;
std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_;
std::unordered_map<std::string, uint32_t> var_names_to_changed_graph_id_;
std::unordered_map<std::string, uint32_t> var_names_to_allocated_graph_id_;
std::map<std::string, uint32_t> var_names_to_changed_graph_id_;
std::map<std::string, uint32_t> var_names_to_allocated_graph_id_;
std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_;
};



+ 1
- 1
ge/graph/partition/graph_partition.cc View File

@@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo
/// Avoid recursion since stack space might be limited.
/// We instead keep a stack of nodes to visit.
std::vector<size_t> temp_stack;
std::unordered_set<size_t> visited;
std::set<size_t> visited;
temp_stack.push_back(src);
while (!temp_stack.empty()) {
size_t cluster = temp_stack.back();


+ 1
- 1
ge/graph/partition/graph_partition.h View File

@@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>;
using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>;
using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>;
using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>;
using ClusterSet = std::unordered_set<size_t>;
using ClusterSet = std::set<size_t>;
class Cluster {
public:
size_t index_; // corresponding to rank of node


+ 2
- 2
ge/graph/passes/constant_folding_pass.cc View File

@@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node,
return FoldingPass::RunOpKernel(node, inputs, outputs);
}

const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>>
const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
&ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const {
return statistic_of_ge_constant_folding_;
}

const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>>
const std::map<std::string, std::pair<std::uint64_t, uint64_t>>
&ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const {
return statistic_of_op_constant_folding_;
}


+ 4
- 4
ge/graph/passes/constant_folding_pass.h View File

@@ -26,11 +26,11 @@ namespace ge {
class ConstantFoldingPass : public FoldingPass {
public:
Status Run(ge::NodePtr &node) override;
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const;
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const;
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const;
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const;
private:
std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_;
std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_;
std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_;
std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_;
};
} // namespace ge



+ 5
- 0
ge/graph/passes/hccl_continuous_memcpy_pass.cc View File

@@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph,
}
GELOGI("Create Assign op:%s.", op_desc->GetName().c_str());

if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) {
GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str());
return nullptr;
}

graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
if (ret != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail.");


+ 1
- 1
ge/graph/passes/hccl_continuous_memcpy_pass.h View File

@@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass {

bool IsDataNode(const std::string& node_type);

std::unordered_map<std::string, uint32_t> node_num_map_;
std::map<std::string, uint32_t> node_num_map_;
};
} // namespace ge



+ 1
- 1
ge/graph/passes/hccl_memcpy_pass.h View File

@@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass {

bool IsDataNode(const std::string& node_type);

std::unordered_map<std::string, uint32_t> node_num_map_;
std::map<std::string, uint32_t> node_num_map_;
};
} // namespace ge



+ 53
- 2
ge/graph/passes/multi_batch_clone_pass.cc View File

@@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) {
}

// parser data dynamic info from atc parameter --input_shape
if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims,
data_to_dynamic_info_) != SUCCESS) {
if (CheckAndParseDynamicData() != SUCCESS) {
GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed");
return PARAM_INVALID;
}
@@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) {
return SUCCESS;
}

Status MultiBatchClonePass::CheckAndParseDynamicData() {
size_t unknown_shape_count = 0;
auto data_name_and_shape = GetLocalOmgContext().user_input_dims;
std::vector<std::string> data_name_order;
for (auto &item : data_name_and_shape) {
data_name_order.push_back(item.first);
}
if (!getnext_sink_dynamic_dims_) {
for (const auto &node : all_data_nodes_) {
auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex);
auto data_shape = data_desc.GetShape();
auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" :
data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others";
auto data_name = node->GetName();

const auto &data_shape_dims = data_shape.GetDims();
if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) {
continue;
}
++unknown_shape_count;
auto iter = find(data_name_order.begin(), data_name_order.end(), data_name);
if (iter == data_name_order.end()) {
if (!GetLocalOmgContext().dynamic_batch_size.empty()) {
auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name);
GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.",
data_name.c_str()); return PARAM_INVALID);
} else if (!GetLocalOmgContext().dynamic_image_size.empty()) {
auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format);
GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.",
data_name.c_str()); return PARAM_INVALID);
} else if (!GetLocalOmgContext().dynamic_dims.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"},
{"--input_shape", "all dynamic data must be set in --input_shape"});
GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape",
node->GetName().c_str(), data_shape.ToString().c_str());
return INTERNAL_ERROR;
}
data_name_and_shape.emplace_back(data_name, data_shape_dims);
}
}
}
auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_);
GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info.");
if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) {
ErrorManager::GetInstance().ATCReportErrMessage("E10040");
GELOGE(PARAM_INVALID,
"Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims");
return PARAM_INVALID;
}
return SUCCESS;
}

Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) {
data_count_from_getnext_ = 0;
getnext_sink_dynamic_dims_ = false;


+ 2
- 0
ge/graph/passes/multi_batch_clone_pass.h View File

@@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass {
/// @return 0: SUCCESS / others: FAILED
///
Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num);
Status CheckAndParseDynamicData();

std::string session_graph_id_;
std::vector<std::vector<int64_t>> batch_shapes_;


+ 1
- 1
ge/graph/passes/switch_to_stream_switch_pass.h View File

@@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass {
std::vector<NodePtr> stream_switch_nodes_;
std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_;
std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_;
std::unordered_map<std::string, uint32_t> node_num_map_;
std::map<std::string, uint32_t> node_num_map_;
};
} // namespace ge
#endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_

+ 1
- 1
ge/graph/preprocess/multi_batch_copy_graph.cc View File

@@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){
}
}
}
auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_);
auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_);
GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info.");
if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) {
ErrorManager::GetInstance().ATCReportErrMessage("E10040");


+ 1
- 1
ge/graph/preprocess/multi_batch_options.cc View File

@@ -377,7 +377,7 @@ bool InitDynamicParams(vector<vector<int64_t>> &shapes) {
/// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims.
/// @return true: Configed for Multi batch / false: Not configed for Multi batch.
///
Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes,
vector<pair<string, vector<int64_t>>> &data_name_and_shape,
map<string, vector<vector<int64_t>> > &data_to_dynamic_info) {
size_t cur_data_index = 0;


+ 3
- 3
ge/graph/preprocess/multi_batch_options.h View File

@@ -74,7 +74,7 @@ Status CalcShape(const std::vector<int64_t> &batch_shape, GeShape &data_shape);
/// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims.
/// @return SUCCESS / PARAM_INVALID
///
Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes,
vector<pair<string, vector<int64_t>>> &data_name_and_shape,
map<string, vector<vector<int64_t>>> &data_to_dynamic_info);

@@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc);
/// @param [in] const string &data_name: cur data name.
/// @return 0: true/false
///
bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name);
GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name);

///
/// @ingroup ge
@@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_nam
/// @param [in] const std::string &input_format: format of input.
/// @return 0: true/false
///
bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name,
const std::string &input_format);

} // namespace multibatch


+ 26
- 1
ge/host_cpu_engine/CMakeLists.txt View File

@@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
target_compile_options(host_cpu_engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(host_cpu_engine PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(host_cpu_engine PRIVATE
@@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(host_cpu_engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(host_cpu_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
target_compile_options(atc_host_cpu_engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_host_cpu_engine PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(atc_host_cpu_engine PRIVATE
@@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(atc_host_cpu_engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_host_cpu_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})
target_compile_options(host_cpu_opskernel_builder PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(host_cpu_opskernel_builder PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(host_cpu_opskernel_builder PRIVATE
@@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(host_cpu_opskernel_builder PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(host_cpu_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})
target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
@@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(atc_host_cpu_opskernel_builder PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
@@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})
target_compile_options(host_cpu_opskernel_builder_static PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
FUNC_VISIBILITY
)

target_include_directories(host_cpu_opskernel_builder_static PRIVATE


+ 19
- 5
ge/host_cpu_engine/engine/host_cpu_engine.h View File

@@ -17,6 +17,20 @@
#ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
#define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <memory>
#include <string>
@@ -32,7 +46,7 @@ namespace host_cpu {
* host cpu engine.
* Used for the ops which executes on host.
*/
class HostCpuEngine {
class GE_FUNC_VISIBILITY HostCpuEngine {
public:
/**
* get HostCpuEngine instance.
@@ -87,25 +101,25 @@ extern "C" {
* When Ge start, GE will invoke this interface
* @return The status whether initialize successfully
*/
ge::Status Initialize(const map<string, string> &options);
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);

/**
* After the initialize, GE will invoke this interface to get the Ops kernel Store
* @param ops_kernel_map The host cpu's ops kernel info
*/
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);

/**
* After the initialize, GE will invoke this interface to get the Graph Optimizer
* @param graph_optimizers The host cpu's Graph Optimizer objs
*/
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);

/**
* When the graph finished, GE will invoke this interface
* @return The status whether initialize successfully
*/
ge::Status Finalize();
GE_FUNC_VISIBILITY ge::Status Finalize();
}

#endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

+ 15
- 1
ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h View File

@@ -17,11 +17,25 @@
#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include "common/opskernel/ops_kernel_builder.h"

namespace ge {
namespace host_cpu {
class HostCpuOpsKernelBuilder : public OpsKernelBuilder {
class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder {
public:
Status Initialize(const map<std::string, std::string> &options) override;



+ 15
- 1
ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h View File

@@ -17,6 +17,20 @@
#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <string>
#include <vector>
@@ -25,7 +39,7 @@

namespace ge {
namespace host_cpu {
class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
public:
HostCpuOpsKernelInfoStore() {}
~HostCpuOpsKernelInfoStore() override = default;


+ 1
- 1
ge/host_cpu_engine/ops_kernel_store/op/host_op.h View File

@@ -21,7 +21,7 @@

namespace ge {
namespace host_cpu {
class HostOp : public Op {
class GE_FUNC_VISIBILITY HostOp : public Op {
public:
HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}
~HostOp() override = default;


+ 1
- 1
ge/host_cpu_engine/ops_kernel_store/op/op.h View File

@@ -29,7 +29,7 @@ namespace host_cpu {
/**
* The base class for all op.
*/
class Op {
class GE_FUNC_VISIBILITY Op {
public:
Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {}
virtual ~Op() = default;


+ 2
- 2
ge/host_cpu_engine/ops_kernel_store/op/op_factory.h View File

@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte
/**
* manage all the op, support create op.
*/
class OpFactory {
class GE_FUNC_VISIBILITY OpFactory {
public:
static OpFactory &Instance();

@@ -70,7 +70,7 @@ class OpFactory {
std::vector<std::string> all_ops_;
};

class OpRegistrar {
class GE_FUNC_VISIBILITY OpRegistrar {
public:
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
OpFactory::Instance().RegisterCreator(type, func);


+ 1
- 1
ge/hybrid/common/tensor_value.cc View File

@@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s
TensorValue::~TensorValue() { Destroy(); }

void TensorValue::Destroy() {
if (buffer_ != nullptr || ref_buffer_ != nullptr) {
if (buffer_ != nullptr) {
GELOGD("Unref tensor: %s", DebugString().c_str());
buffer_.reset();
}


+ 3
- 1
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_));
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End");

HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call.");
HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs),
"Failed to execute partitioned call.");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");

HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");

args.outputs.clear();
HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End");
return SUCCESS;


+ 43
- 2
ge/hybrid/executor/subgraph_executor.cc View File

@@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
}

Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc) {
const std::vector<ConstGeTensorDescPtr> &input_desc,
const std::vector<TensorValue> &outputs) {
GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());

if (!outputs.empty()) {
GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
"Failed to enable output zero copy by user provided outputs.");
}
if (!graph_item_->IsDynamic()) {
return ExecuteAsyncForKnownShape(inputs);
}
@@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
return SUCCESS;
}

Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc) {
return ExecuteAsync(inputs, input_desc, {});
}

Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
if (graph_item_->GetAllNodes().size() != 1) {
@@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {

return SUCCESS;
}

Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) {
GELOGD("To enable zero copy, output number = %zu", outputs.size());
const auto &output_edges = graph_item_->GetOutputEdges();
// Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
if (outputs.size() != output_edges.size()) {
GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
output_edges.size(),
outputs.size());
return PARAM_INVALID;
}

for (size_t i = 0; i < outputs.size(); ++i) {
auto &output_tensor = outputs[i];
auto &output_node = output_edges[i].first;
int output_idx = output_edges[i].second;
GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s",
graph_item_->GetName().c_str(),
i,
output_node->NodeName().c_str(),
output_idx,
output_tensor.DebugString().c_str());

GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
"[%s] Failed to set input tensor[%zu]",
graph_item_->GetName().c_str(),
i);
}

GELOGD("Done enabling zero copy for outputs successfully.");
return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 14
- 1
ge/hybrid/executor/subgraph_executor.h View File

@@ -43,7 +43,19 @@ class SubgraphExecutor {
* @param input_desc input tensor descriptions
* @return SUCCESS on success, error code otherwise
*/
Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc);
Status ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc);

/**
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are
* valid after this method returned
* @param inputs input tensors
* @param input_desc input tensor descriptions
* @return SUCCESS on success, error code otherwise
*/
Status ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc,
const std::vector<TensorValue> &outputs);

/**
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are
@@ -76,6 +88,7 @@ class SubgraphExecutor {

private:
Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state);
Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs);
static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state);
Status Init(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc);


+ 7
- 2
ge/hybrid/model/hybrid_model.cc View File

@@ -40,9 +40,14 @@ HybridModel::~HybridModel() {
GELOGD("[%s] HybridModel destroyed.", model_name_.c_str());
}

Status HybridModel::Init() {
Status HybridModel::Init(bool is_single_op) {
GELOGD("Start to init hybrid model.");
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
is_single_op_ = is_single_op;
if (is_single_op) {
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model.");
} else {
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
}
GELOGD("HybridModel initialized successfully.");
return SUCCESS;
}


+ 7
- 1
ge/hybrid/model/hybrid_model.h View File

@@ -37,7 +37,7 @@ class HybridModel {

~HybridModel();

Status Init();
Status Init(bool is_single_op = false);

const NodeItem *GetNodeItem(const NodePtr &node) const;

@@ -69,6 +69,10 @@ class HybridModel {
return model_id_;
}

bool IsSingleOp() const {
return is_single_op_;
}

TensorValue* GetVariable(const string &name) const;

NodePtr GetVariableNode(const string &name) const;
@@ -131,11 +135,13 @@ class HybridModel {
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;

bool is_new_model_desc_ = false; // support aipp
bool is_single_op_ = false;

// runtime fields
uint32_t device_id_ = 0;
uint32_t model_id_ = 0;
uint8_t *var_mem_base_ = nullptr;
std::unique_ptr<TensorBuffer> weight_buffer_;
RuntimeParam root_runtime_param_;
};
} // namespace hybrid


+ 111
- 24
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() {
return SUCCESS;
}

Status HybridModelBuilder::BuildForSingleOp() {
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
GELOGI("[%s] Start to build hybrid model.", GetGraphName());
auto ret = ge_root_model_->GetSubgraphInstanceNameToModel();
const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()];
GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model),
"[%s] Failed to index task defs", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName());
return SUCCESS;
}

Status HybridModelBuilder::ValidateParams() {
GE_CHECK_NOTNULL(ge_root_model_);
GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph());
@@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() {
}

Status HybridModelBuilder::InitWeights() {
// For constant in root graph
const auto &root_graph = ge_root_model_->GetRootGraph();
const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel();
auto iter = subgraph_models.find(root_graph->GetName());
if (iter == subgraph_models.end()) {
GELOGD("Root graph model not found");
return SUCCESS;
}

auto &root_model = iter->second;
const auto &weight_buffer = root_model->GetWeight();
if (weight_buffer.GetSize() == 0) {
GELOGD("weight is empty");
return SUCCESS;
}

auto allocator = NpuMemoryAllocator::GetAllocator();
GE_CHECK_NOTNULL(allocator);

for (auto &it : hybrid_model_.node_items_) {
auto &node_item = it.second;
if (node_item->node_type != CONSTANT) {
hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size());
GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_);
auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData());
GE_CHK_RT_RET(rtMemcpy(weight_base,
hybrid_model_.weight_buffer_->GetSize(),
weight_buffer.GetData(),
weight_buffer.GetSize(),
RT_MEMCPY_HOST_TO_DEVICE));

GELOGI("Init weight mem successfully, weight base %p, weight size = %zu",
weight_base,
hybrid_model_.weight_buffer_->GetSize());
for (auto &node : root_graph->GetDirectNode()) {
if (node->GetType() != CONSTANT) {
continue;
}

const auto &constant_node = node_item->node;
auto op_desc = constant_node->GetOpDesc();
auto op_desc = node->GetOpDesc();
auto v_weights = ModelUtils::GetWeights(op_desc);
if (v_weights.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str());
return INTERNAL_ERROR;
}
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
auto output_desc = op_desc->MutableOutputDesc(0);
GE_CHECK_NOTNULL(output_desc);
auto tensor_size = ge_tensor->GetData().GetSize();
GELOGD("[%s] Start to init Constant node [%s], size = %ld",
GE_CHECK_NOTNULL(ge_tensor);
const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc();
int64_t tensor_size = 0;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size),
"[%s] Failed to get tensor size",
node->GetName().c_str());
int64_t data_offset = 0;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset),
"[%s] Failed to get data offset",
node->GetName().c_str());
GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld",
GetGraphName(),
constant_node->GetName().c_str(),
tensor_size);
node->GetName().c_str(),
tensor_size,
data_offset);

auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size);
GE_CHECK_NOTNULL(tensor_buffer);
std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer)));
GE_CHECK_NOTNULL(constant_tensor);
constant_tensor->SetName("Constant_" + op_desc->GetName());
if (tensor_size > 0) {
GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(),
constant_tensor->GetSize(),
ge_tensor->GetData().data(),
ge_tensor->GetData().size(),
RT_MEMCPY_HOST_TO_DEVICE));
}

hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor));
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size);
hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor));
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size);
}
return SUCCESS;
}
@@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr
return SUCCESS;
}

Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) {
// index task defs
GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str());
std::unordered_map<int64_t, NodePtr> node_map;
for (const auto &node : sub_graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
auto node_id = node->GetOpDesc()->GetId();
GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str());
node_map.emplace(node_id, node);
}

auto tasks = ge_model->GetModelTaskDefPtr()->task();
for (int i = 0; i < tasks.size(); ++i) {
const domi::TaskDef &task_def = tasks[i];
GELOGI("Task id = %d, task type = %d", i, task_def.type());
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
uint32_t op_index = -1;
if (task_type == RT_MODEL_TASK_KERNEL) {
op_index = task_def.kernel().context().op_index();
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
op_index = task_def.kernel_ex().op_index();
} else if (task_type == RT_MODEL_TASK_HCCL) {
op_index = task_def.kernel_hccl().op_index();
} else {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
continue;
}

auto iter = node_map.find(op_index);
if (iter == node_map.end()) {
GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index);
return INTERNAL_ERROR;
}

auto &node = iter->second;
if (task_type == RT_MODEL_TASK_KERNEL) {
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
}

GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index);
hybrid_model_.task_defs_[node].emplace_back(task_def);
}

return SUCCESS;
}

Status HybridModelBuilder::IndexTaskDefs() {
const auto &root_graph = ge_root_model_->GetRootGraph();
if (SetOutputNameAttr(*root_graph) != SUCCESS) {


+ 2
- 0
ge/hybrid/model/hybrid_model_builder.h View File

@@ -35,6 +35,7 @@ class HybridModelBuilder {
explicit HybridModelBuilder(HybridModel &hybrid_model);
~HybridModelBuilder() = default;
Status Build();
Status BuildForSingleOp();

private:
static Status UpdateAnchorStatus(const NodePtr &node);
@@ -64,6 +65,7 @@ class HybridModelBuilder {
Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies);
Status ParseDependentForFusedSubgraph(NodeItem &node_item);
Status IndexTaskDefs();
Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model);
Status IndexSpecialNodes();
Status InitRuntimeParams();
Status InitModelMem();


+ 3
- 1
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() {
Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const {
GE_CHECK_NOTNULL(node);
GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str());
bool is_single_op = model.IsSingleOp();

auto *task_defs = model.GetTaskDefs(node);
if (task_defs == nullptr || task_defs->empty()) {
@@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod

AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs);
std::unique_ptr<NodeTask> node_task;
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str());
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op),
"[%s] Failed to build op tasks.", node->GetName().c_str());
task = std::move(node_task);
GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str());
return SUCCESS;


+ 1
- 1
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
if (rt_ret != RT_ERROR_NONE) {
if (rt_ret != RT_ERROR_NONE || is_single_op_) {
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());


+ 3
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -50,6 +50,8 @@ class AiCoreOpTask {

uint32_t GetBlockDim() const {return block_dim_;}

void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;};

protected:
Status UpdateTilingInfo(TaskContext &context);
virtual std::string GetKeyForOpParamSize() const;
@@ -72,6 +74,7 @@ class AiCoreOpTask {
uint32_t args_size_ = 0;
uint32_t block_dim_ = 1;
bool clear_atomic_ = true;
bool is_single_op_ = false;
std::vector<int> output_indices_to_skip_;
};



+ 5
- 1
ge/hybrid/node_executor/aicore/aicore_task_builder.cc View File

@@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector
: op_desc_(op_desc), task_defs_(task_defs) {
}

Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) {
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task,
bool ignore_failure_on_atomic,
bool is_single_op) {
GE_CHECK_NOTNULL(op_desc_);
if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) {
GELOGE(INTERNAL_ERROR,
@@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i
auto atomic_task =
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask());
GE_CHECK_NOTNULL(atomic_task);
atomic_task->SetSingleOp(is_single_op);
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()),
"[%s] Failed to init task for AtomicAddrClean",
op_desc_->GetName().c_str());
@@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i
// build aicore task
auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask());
GE_CHECK_NOTNULL(aicore_task);
aicore_task->SetSingleOp(is_single_op);
GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()),
"[%s] Failed to init task for AtomicAddrClean",
op_desc_->GetName().c_str());


+ 1
- 1
ge/hybrid/node_executor/aicore/aicore_task_builder.h View File

@@ -47,7 +47,7 @@ class AiCoreTaskBuilder {
AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs);
~AiCoreTaskBuilder() = default;

Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic);
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false);

private:
bool ExpectAtomicAddrCleanTask();


+ 2
- 2
ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc View File

@@ -27,7 +27,7 @@ namespace ge {
namespace hybrid {
REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor);

const std::unordered_map<std::string, std::vector<uint32_t>>
const std::map<std::string, std::vector<uint32_t>>
RefInputTask::out_ref_input_index_ = {{DATA, {}},
{AIPPDATA, {}},
{RESHAPE, {}},
@@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<uint32_t>>
{BROADCASTGRADIENTARGS, {}}
};

const std::unordered_set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE};
const std::set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE};

Status RefInputTask::UpdateArgs(TaskContext &) {
// no need update args


+ 2
- 2
ge/hybrid/node_executor/ge_local/ge_local_node_executor.h View File

@@ -46,7 +46,7 @@ class RefInputTask : public NodeTask {

// key is op type, value is output ref input index,
// e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one
static const std::unordered_map<std::string, std::vector<uint32_t>> out_ref_input_index_;
static const std::map<std::string, std::vector<uint32_t>> out_ref_input_index_;
};

class DependInputShapeTask : public NodeTask {
@@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask {
const NodePtr node_;

// ops depend input shape
static const std::unordered_set<std::string> depend_input_shape_ops_;
static const std::set<std::string> depend_input_shape_ops_;
};

class ConstantNodeTask : public NodeTask {


+ 1
- 1
ge/init/gelib.h View File

@@ -31,7 +31,7 @@ using std::map;
using std::vector;

namespace ge {
class GELib {
class GE_FUNC_VISIBILITY GELib {
public:
GELib() = default;
~GELib() = default;


+ 5
- 5
ge/ir_build/atc_ir_common.cc View File

@@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) {
return ge::SUCCESS;
}

bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map,
std::string &dynamic_batch_size) {
int32_t size = 0;
for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) {
@@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>>
return true;
}

bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map,
const std::string input_format, std::string &dynamic_image_size) {
if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) {
GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str());
@@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>>
return true;
}

bool CheckDynamicDimsInputShapeValid(const unordered_map<string, vector<int64_t>> &shape_map,
bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_map,
string input_format, string &dynamic_dims) {
if (input_format != "ND") {
ErrorManager::GetInstance().ATCReportErrMessage(
@@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
return ge::SUCCESS;
}

unordered_map<string, vector<int64_t>> shape_map;
map<string, vector<int64_t>> shape_map;
vector<pair<string, vector<int64_t>>> user_shape_map;
is_dynamic_input = true;
if (input_shape.empty()) {
@@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
return ge::SUCCESS;
}

bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &shape_map,
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) {
vector<string> shape_vec = StringUtils::Split(input_shape, ';');
const int DEFAULT_SHAPE_PAIR_SIZE = 2;


+ 4
- 4
ge/ir_build/atc_ir_common.h View File

@@ -46,13 +46,13 @@ static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat =
static const std::string kEnableCompressWeightTrue = "1";
static const std::string kEnableCompressWeightFalse = "0";

bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map,
std::string &dynamic_batch_size);

bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map,
const std::string input_format, std::string &dynamic_image_size);

bool CheckDynamicDimsInputShapeValid(const std::unordered_map<std::string, std::vector<int64_t>> &shape_map,
bool CheckDynamicDimsInputShapeValid(const std::map<std::string, std::vector<int64_t>> &shape_map,
std::string input_format, std::string &dynamic_dims);

bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims);
@@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string
std::string &dynamic_dims, const std::string input_shape,
const std::string input_format, bool &is_dynamic_input);

bool ParseInputShape(const std::string &input_shape, std::unordered_map<string, std::vector<int64_t>> &shape_map,
bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map,
std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false);

Status CheckOutputTypeParamValid(const std::string output_type);


+ 1
- 1
ge/ir_build/ge_ir_build.cc View File

@@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) {
if (options_.find(kInputShape) == options_.end()) {
return GRAPH_SUCCESS;
}
unordered_map<string, vector<int64_t>> shape_map;
map<string, vector<int64_t>> shape_map;
vector<pair<string, vector<int64_t>>> user_shape_map;
GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true),
return GRAPH_PARAM_INVALID, "parse input shape failed!");


+ 12
- 0
ge/offline/CMakeLists.txt View File

@@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE
-O2
-Wno-deprecated-declarations
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_atc.bin PRIVATE
@@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
FUNC_VISIBILITY
)

target_include_directories(atc_atc.bin PRIVATE
@@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(atc_atc.bin PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_atc.bin PRIVATE
$<BUILD_INTERFACE:intf_pub>
ascend_protobuf
@@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE
-O2
-Wno-deprecated-declarations
-fno-common
-fvisibility=hidden
)

target_compile_definitions(fwk_atc.bin PRIVATE
@@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
FUNC_VISIBILITY
)

target_include_directories(fwk_atc.bin PRIVATE
@@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(fwk_atc.bin PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(fwk_atc.bin PRIVATE
$<BUILD_INTERFACE:intf_pub>
ascend_protobuf


+ 1
- 1
ge/opskernel_manager/ops_kernel_builder_manager.h View File

@@ -23,7 +23,7 @@

namespace ge {
using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>;
class OpsKernelBuilderManager {
class GE_FUNC_VISIBILITY OpsKernelBuilderManager {
public:
~OpsKernelBuilderManager();



+ 1
- 1
ge/opskernel_manager/ops_kernel_manager.h View File

@@ -41,7 +41,7 @@ using std::vector;
namespace ge {
using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>;

class OpsKernelManager {
class GE_FUNC_VISIBILITY OpsKernelManager {
public:
friend class GELib;



+ 6
- 0
ge/plugin/engine/CMakeLists.txt View File

@@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST})
target_compile_options(engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(engine PRIVATE
REUSE_MEMORY=1
PROTOBUF_INLINE_NOT_IN_HEADERS=0
FUNC_VISIBILITY
)

target_include_directories(engine PRIVATE
@@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed


+ 8
- 8
ge/plugin/engine/dnnengines.h View File

@@ -25,7 +25,7 @@
#include "plugin/engine/engine_manage.h"

namespace ge {
class AICoreDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine {
public:
AICoreDNNEngine() = default;
explicit AICoreDNNEngine(const std::string &engine_name);
@@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine {
DNNEngineAttribute engine_attribute_;
};

class VectorCoreDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine {
public:
VectorCoreDNNEngine() = default;
explicit VectorCoreDNNEngine(const std::string &engine_name);
@@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine {
};


class AICpuDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine {
public:
AICpuDNNEngine() = default;
explicit AICpuDNNEngine(const std::string &engine_name);
@@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine {
DNNEngineAttribute engine_attribute_;
};

class AICpuTFDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine {
public:
AICpuTFDNNEngine() = default;
explicit AICpuTFDNNEngine(const std::string &engine_name);
@@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine {
DNNEngineAttribute engine_attribute_;
};

class GeLocalDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine {
public:
GeLocalDNNEngine() = default;
explicit GeLocalDNNEngine(const std::string &engine_name);
@@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine {
DNNEngineAttribute engine_attribute_;
};

class HostCpuDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine {
public:
HostCpuDNNEngine() = default;
explicit HostCpuDNNEngine(const std::string &engine_name);
@@ -116,7 +116,7 @@ private:
DNNEngineAttribute engine_attribute_;
};

class RtsDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine {
public:
RtsDNNEngine() = default;
explicit RtsDNNEngine(const std::string &engine_name);
@@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine {
DNNEngineAttribute engine_attribute_;
};

class HcclDNNEngine : public DNNEngine {
class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine {
public:
HcclDNNEngine() = default;
explicit HcclDNNEngine(const std::string &engine_name);


+ 16
- 2
ge/plugin/engine/engine_manage.h View File

@@ -17,6 +17,20 @@
#ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_
#define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <memory>
#include <string>
@@ -26,7 +40,7 @@

namespace ge {
using DNNEnginePtr = std::shared_ptr<DNNEngine>;
class EngineManager {
class GE_FUNC_VISIBILITY EngineManager {
public:
static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr);
static DNNEnginePtr GetEngine(const std::string &engine_name);
@@ -34,7 +48,7 @@ class EngineManager {
};

extern "C" {
void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines);
GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines);
}
} // namespace ge
#endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_

+ 17
- 0
ge/session/inner_session.cc View File

@@ -77,6 +77,23 @@ Status InnerSession::Initialize() {

UpdateThreadContext(std::map<std::string, std::string>{});

// session device id set here
std::string str_session_device_id;
if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) {
GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str());

uint32_t session_device_id = 0;
try {
session_device_id = static_cast<uint32_t>(std::stoi(str_session_device_id.c_str()));
// session device id has priority
GetContext().SetCtxDeviceId(session_device_id);
} catch (std::invalid_argument &) {
GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str());
} catch (std::out_of_range &) {
GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str());
}
}

GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId()));

DumpProperties dump_properties;


+ 2
- 2
ge/session/omg.cc View File

@@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format,
}

// Analyze the input shape paramete
unordered_map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims;
map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims;

if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims,
is_dynamic_input) ||
@@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) {
///
static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) {
GE_CHECK_NOTNULL(graph);
unordered_map<string, string> graphNodeTypes;
map<string, string> graphNodeTypes;
for (const NodePtr &node : graph->GetAllNodes()) {
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {


+ 19
- 1
ge/single_op/single_op.cc View File

@@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers,
vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers) {
GE_CHECK_NOTNULL(op_task_);
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
if (hybrid_model_executor_ != nullptr) {
GELOGD("Execute multi-task dynamic single op by hybrid model executor");
hybrid::HybridModelExecutor::ExecuteArgs args;
for (auto &input : input_buffers) {
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
}
for (auto &output : output_buffers) {
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
}
for (auto &tensor_desc : input_desc) {
auto desc = MakeShared<GeTensorDesc>(tensor_desc);
GE_CHECK_NOTNULL(desc);
args.input_desc.emplace_back(desc);
}

return hybrid_model_executor_->Execute(args);
}

std::lock_guard<std::mutex> lk(*stream_mutex_);
GE_CHECK_NOTNULL(op_task_);

GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));


+ 4
- 1
ge/single_op/single_op.h View File

@@ -28,6 +28,7 @@
#include "runtime/stream.h"
#include "task/op_task.h"
#include "cce/aicpu_engine_struct.h"
#include "hybrid/executor/hybrid_model_executor.h"

namespace ge {
class StreamResource;
@@ -46,7 +47,7 @@ class SingleOp {
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);

friend class SingleOpModel;
StreamResource *stream_resource_;
StreamResource *stream_resource_ = nullptr;
std::mutex *stream_mutex_;
rtStream_t stream_ = nullptr;
std::vector<void *> input_addr_list_;
@@ -77,6 +78,8 @@ class DynamicSingleOp {
std::vector<DataBuffer> &outputs) const;

std::unique_ptr<OpTask> op_task_;
std::unique_ptr<hybrid::HybridModel> hybrid_model_;
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
uintptr_t resource_id_ = 0;
std::mutex *stream_mutex_;
rtStream_t stream_ = nullptr;


+ 40
- 0
ge/single_op/single_op_model.cc View File

@@ -31,6 +31,8 @@
#include "task/aicpu_task_builder.h"
#include "task/aicpu_kernel_task_builder.h"
#include "task/tbe_task_builder.h"
#include "hybrid/executor/hybrid_model_executor.h"
#include "hybrid/node_executor/node_executor.h"

static std::atomic<std::uint64_t> aicpu_kernel_id(0);

@@ -42,6 +44,20 @@ namespace ge {
namespace {
const size_t kDataOutputNum = 1;
} // namespace
static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
for (const auto &node : comp_graph->GetAllNodes()) {
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
const auto &depends = op_desc->GetOpInferDepends();
if (!depends.empty()) {
flag = true;
return SUCCESS;
}
}
return SUCCESS;
}

SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size)
: model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {}

@@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
model_params_.memory_size = UINT_MAX;

auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
bool infer_depend_flag = false;
GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag));
if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) {
GELOGD("Build single op HybridModel.");
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
auto root_model = model_helper_.GetGeRootModel();
GE_CHECK_NOTNULL(root_model);
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
GE_CHECK_NOTNULL(single_op.hybrid_model_);
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model");
int32_t device_id = 0;
GE_CHK_RT_RET(rtGetDevice(&device_id));
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
device_id,
resource.GetStream()));
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model");
return SUCCESS;
}
return BuildTaskListForDynamicOp(single_op);
}
} // namespace ge

+ 4
- 0
ge/single_op/stream_resource.cc View File

@@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) {
return it->second.get();
}

rtStream_t StreamResource::GetStream() const {
return stream_;
}

void StreamResource::SetStream(rtStream_t stream) {
stream_ = stream;
}


+ 1
- 0
ge/single_op/stream_resource.h View File

@@ -37,6 +37,7 @@ class StreamResource {
StreamResource(StreamResource &&) = delete;
StreamResource &operator=(const StreamResource &) = delete;
StreamResource &operator=(StreamResource &&) = delete;
rtStream_t GetStream() const;
void SetStream(rtStream_t stream);

SingleOp *GetOperator(const void *key);


+ 1
- 1
ge/stub/gen_stubapi.py View File

@@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve
"""
this attr is used for symbol table visible
"""
GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
GE_ATTR = 'GE_FUNC_VISIBILITY'

"""
generate stub func body by return type


+ 5
- 5
inc/external/ge/ge_api.h View File

@@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map<AscendString
}

// Initialize GE
ATTRIBUTED_DEPRECATED(Status GEInitialize(const std::map<AscendString, AscendString> &))
Status GEInitialize(const std::map<std::string, std::string> &options);
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &))
GE_FUNC_VISIBILITY Status GEInitialize(const std::map<std::string, std::string> &options);

Status GEInitialize(const std::map<AscendString, AscendString> &options);
GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &options);

// Finalize GE, release all resources
Status GEFinalize();
GE_FUNC_VISIBILITY Status GEFinalize();

class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session {
class GE_FUNC_VISIBILITY Session {
public:
ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &))
explicit Session(const std::map<std::string, std::string> &options);


+ 2
- 2
inc/external/ge/ge_api_error_codes.h View File

@@ -28,7 +28,7 @@ namespace ge {
#define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead."))
#endif

class StatusFactory {
class GE_FUNC_VISIBILITY StatusFactory {
public:
static StatusFactory *Instance() {
static StatusFactory instance;
@@ -70,7 +70,7 @@ class StatusFactory {
std::map<uint32_t, std::string> err_desc_;
};

class ErrorNoRegisterar {
class GE_FUNC_VISIBILITY ErrorNoRegisterar {
public:
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }


+ 14
- 0
inc/external/ge/ge_error_codes.h View File

@@ -17,6 +17,20 @@
#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <stddef.h>

#ifdef __cplusplus


+ 27
- 13
inc/external/ge/ge_ir_build.h View File

@@ -17,6 +17,20 @@
#ifndef INC_EXTERNAL_GE_IR_BUILD_H_
#define INC_EXTERNAL_GE_IR_BUILD_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <string>
#include <map>
#include <memory>
@@ -44,17 +58,17 @@ struct ModelBufferData {
* @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &))
graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options);
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &))
GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options);

graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options);
GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options);

/**
* @ingroup AscendCL
* @brief build model.Notice the model is stored in buffer
*
*/
void aclgrphBuildFinalize();
GE_FUNC_VISIBILITY void aclgrphBuildFinalize();

/**
* @ingroup AscendCL
@@ -66,12 +80,12 @@ void aclgrphBuildFinalize();
* @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &,
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &,
ModelBufferData &))
graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options,
GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options,
ModelBufferData &model);

graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options,
GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options,
ModelBufferData &model);

/**
@@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendStrin
* @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ATTRIBUTED_DEPRECATED(graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);

graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);

/**
* @ingroup AscendCL
@@ -98,7 +112,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod
* @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);
GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);

/**
* @ingroup AscendCL
@@ -110,7 +124,7 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat
* @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);

/**
* @ingroup AscendCL
@@ -123,7 +137,7 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz
* @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs,
GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs,
const std::vector<TensorDesc> &outputs, Graph &graph);

}; // namespace ge


+ 1
- 1
inc/framework/common/debug/ge_log.h View File

@@ -37,7 +37,7 @@ extern "C" {
// trace status of log
enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };

class GeLog {
class GE_FUNC_VISIBILITY GeLog {
public:
static uint64_t GetTid() {
#ifdef __GNUC__


+ 1
- 1
inc/framework/common/debug/log.h View File

@@ -278,7 +278,7 @@
} while (0)

template <typename T>
std::string FmtToStr(const T &t) {
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
std::string fmt;
std::stringstream st;
st << "[" << t << "]";


+ 16
- 2
inc/framework/common/fmk_error_codes.h View File

@@ -17,6 +17,20 @@
#ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_
#define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <string>

@@ -38,7 +52,7 @@ const int MODID_OME = 2; // OME module ID
const int MODID_CALIBRATION = 3; // Calibration module ID

namespace domi {
class StatusFactory {
class GE_FUNC_VISIBILITY StatusFactory {
public:
static StatusFactory *Instance();

@@ -54,7 +68,7 @@ class StatusFactory {
std::map<uint32_t, std::string> err_desc_;
};

class ErrorNoRegisterar {
class GE_FUNC_VISIBILITY ErrorNoRegisterar {
public:
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
~ErrorNoRegisterar() {}


+ 1
- 1
inc/framework/common/ge_format_util.h View File

@@ -23,7 +23,7 @@
#include "graph/tensor.h"

namespace ge {
class GeFormatUtil {
class GE_FUNC_VISIBILITY GeFormatUtil {
public:
///
/// @name TransShape


+ 1
- 1
inc/framework/common/ge_types.h View File

@@ -215,7 +215,7 @@ struct ModelInfo {
};

// Asynchronous callback interface, implemented by the caller
class ModelListener {
class GE_FUNC_VISIBILITY ModelListener {
public:
virtual ~ModelListener() {}
///


+ 15
- 1
inc/framework/common/gflags_util.h View File

@@ -17,11 +17,25 @@
#ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_
#define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <gflags/gflags.h>
#include <string>

namespace ge {
class GflagsUtils {
class GE_FUNC_VISIBILITY GflagsUtils {
public:
static bool IsSetCommandTrue(const char *name) {
std::string out;


+ 1
- 1
inc/framework/common/helper/model_helper.h View File

@@ -28,7 +28,7 @@
#include "model/ge_root_model.h"

namespace ge {
class ModelHelper {
class GE_FUNC_VISIBILITY ModelHelper {
public:
ModelHelper() = default;
~ModelHelper();


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save