From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmqtags/v1.2.0
| @@ -2,6 +2,7 @@ | |||||
| /build | /build | ||||
| /output | /output | ||||
| /prebuilts | /prebuilts | ||||
| /cov | |||||
| *.ir | *.ir | ||||
| *.out | *.out | ||||
| @@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | ||||
| if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| @@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC) | |||||
| elseif(PLATFORM STREQUAL "inference") | elseif(PLATFORM STREQUAL "inference") | ||||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | ||||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
| find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| elseif(PRODUCT STREQUAL "flr1") | elseif(PRODUCT STREQUAL "flr1") | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| @@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | ||||
| endif() | endif() | ||||
| elseif(PLATFORM STREQUAL "all") | elseif(PLATFORM STREQUAL "all") | ||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
| find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | |||||
| find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| else() | else() | ||||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | ||||
| endif() | endif() | ||||
| @@ -166,14 +166,14 @@ build_graphengine() | |||||
| echo "execute command: cmake ${CMAKE_ARGS} .. failed." | echo "execute command: cmake ${CMAKE_ARGS} .. failed." | ||||
| return 1 | return 1 | ||||
| fi | fi | ||||
| COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||||
| COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||||
| TARGET=${COMMON_TARGET} | TARGET=${COMMON_TARGET} | ||||
| if [ "x${PLATFORM}" = "xtrain" ] | if [ "x${PLATFORM}" = "xtrain" ] | ||||
| then | then | ||||
| TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}" | |||||
| TARGET="ge_runner fwk_atc.bin ${TARGET}" | |||||
| elif [ "x${PLATFORM}" = "xinference" ] | elif [ "x${PLATFORM}" = "xinference" ] | ||||
| then | then | ||||
| TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}" | |||||
| TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}" | |||||
| elif [ "X$ENABLE_GE_UT" = "Xon" ] | elif [ "X$ENABLE_GE_UT" = "Xon" ] | ||||
| then | then | ||||
| TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | ||||
| @@ -183,7 +183,7 @@ build_graphengine() | |||||
| elif [ "x${PLATFORM}" = "xall" ] | elif [ "x${PLATFORM}" = "xall" ] | ||||
| then | then | ||||
| # build all the target | # build all the target | ||||
| TARGET="" | |||||
| TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" | |||||
| fi | fi | ||||
| make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | ||||
| @@ -198,8 +198,6 @@ g++ -v | |||||
| mk_dir ${OUTPUT_PATH} | mk_dir ${OUTPUT_PATH} | ||||
| build_graphengine || { echo "GraphEngine build failed."; return; } | build_graphengine || { echo "GraphEngine build failed."; return; } | ||||
| echo "---------------- GraphEngine build finished ----------------" | echo "---------------- GraphEngine build finished ----------------" | ||||
| #cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}" | |||||
| #rm -rf "${OUTPUT_PATH}/"libproto* | |||||
| rm -f ${OUTPUT_PATH}/libgmock*.so | rm -f ${OUTPUT_PATH}/libgmock*.so | ||||
| rm -f ${OUTPUT_PATH}/libgtest*.so | rm -f ${OUTPUT_PATH}/libgtest*.so | ||||
| rm -f ${OUTPUT_PATH}/lib*_stub.so | rm -f ${OUTPUT_PATH}/lib*_stub.so | ||||
| @@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500 | |||||
| echo "---------------- GraphEngine output generated ----------------" | echo "---------------- GraphEngine output generated ----------------" | ||||
| # if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | |||||
| # cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH} | |||||
| # fi | |||||
| if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | ||||
| cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH} | ||||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH} | ||||
| @@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | ||||
| cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | ||||
| # if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then | |||||
| # export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH} | |||||
| # echo ${LD_LIBRARY_PATH} | |||||
| ${OUTPUT_PATH}/ut_libgraph && | ${OUTPUT_PATH}/ut_libgraph && | ||||
| ${OUTPUT_PATH}/ut_libge_multiparts_utest && | ${OUTPUT_PATH}/ut_libge_multiparts_utest && | ||||
| ${OUTPUT_PATH}/ut_libge_distinct_load_utest && | ${OUTPUT_PATH}/ut_libge_distinct_load_utest && | ||||
| @@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
| echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | ||||
| exit 1; | exit 1; | ||||
| fi | fi | ||||
| # fi | |||||
| # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
| echo "Generating coverage statistics, please wait..." | |||||
| cd ${BASEPATH} | |||||
| rm -rf ${BASEPATH}/cov | |||||
| mkdir ${BASEPATH}/cov | |||||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
| cd ${BASEPATH}/cov | |||||
| genhtml coverage.info | |||||
| echo "Generating coverage statistics, please wait..." | |||||
| cd ${BASEPATH} | |||||
| rm -rf ${BASEPATH}/cov | |||||
| mkdir ${BASEPATH}/cov | |||||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
| cd ${BASEPATH}/cov | |||||
| genhtml coverage.info | |||||
| fi | fi | ||||
| # generate output package in tar form, including ut/st libraries/executables | # generate output package in tar form, including ut/st libraries/executables | ||||
| @@ -256,6 +244,8 @@ generate_package() | |||||
| ATC_PATH="atc/lib64" | ATC_PATH="atc/lib64" | ||||
| ATC_BIN_PATH="atc/bin" | ATC_BIN_PATH="atc/bin" | ||||
| FWK_BIN_PATH="fwkacllib/bin" | FWK_BIN_PATH="fwkacllib/bin" | ||||
| FWK_INCLUDE_PATH="fwkacllib/include" | |||||
| ATC_INCLUDE_PATH="atc/include" | |||||
| NNENGINE_PATH="plugin/nnengine/ge_config" | NNENGINE_PATH="plugin/nnengine/ge_config" | ||||
| OPSKERNEL_PATH="plugin/opskernel" | OPSKERNEL_PATH="plugin/opskernel" | ||||
| @@ -277,6 +267,8 @@ generate_package() | |||||
| mk_dir "${OUTPUT_PATH}/${ACL_PATH}" | mk_dir "${OUTPUT_PATH}/${ACL_PATH}" | ||||
| mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}" | mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}" | ||||
| mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | ||||
| mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" | |||||
| mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" | |||||
| cd "${OUTPUT_PATH}" | cd "${OUTPUT_PATH}" | ||||
| @@ -289,10 +281,10 @@ generate_package() | |||||
| find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \; | find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \; | ||||
| MAX_DEPTH=1 | MAX_DEPTH=1 | ||||
| if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] | |||||
| then | |||||
| MAX_DEPTH=2 | |||||
| fi | |||||
| # if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] | |||||
| # then | |||||
| # MAX_DEPTH=2 | |||||
| # fi | |||||
| for lib in "${PLUGIN_OPSKERNEL[@]}"; | for lib in "${PLUGIN_OPSKERNEL[@]}"; | ||||
| do | do | ||||
| find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \; | find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \; | ||||
| @@ -318,7 +310,15 @@ generate_package() | |||||
| find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \; | find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \; | ||||
| find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \; | find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \; | ||||
| find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \; | find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \; | ||||
| cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH} | |||||
| cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH} | |||||
| cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH} | |||||
| cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH} | |||||
| cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH} | |||||
| cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH} | |||||
| if [ "x${PLATFORM}" = "xtrain" ] | if [ "x${PLATFORM}" = "xtrain" ] | ||||
| then | then | ||||
| tar -cf graphengine_lib.tar fwkacllib | tar -cf graphengine_lib.tar fwkacllib | ||||
| @@ -339,4 +339,4 @@ then | |||||
| find ./ -name graphengine_lib.tar -exec rm {} \; | find ./ -name graphengine_lib.tar -exec rm {} \; | ||||
| tar -cf graphengine_lib.tar lib | tar -cf graphengine_lib.tar lib | ||||
| fi | fi | ||||
| echo "---------------- GraphEngine package archive generated ----------------" | |||||
| echo "---------------- GraphEngine package archive generated ----------------" | |||||
| @@ -639,15 +639,6 @@ set(INFER_SRC_LIST | |||||
| "graph/load/model_manager/task_info/model_exit_task_info.cc" | "graph/load/model_manager/task_info/model_exit_task_info.cc" | ||||
| "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | ||||
| "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | ||||
| "single_op/task/op_task.cc" | |||||
| "single_op/task/build_task_utils.cc" | |||||
| "single_op/task/tbe_task_builder.cc" | |||||
| "single_op/task/aicpu_task_builder.cc" | |||||
| "single_op/task/aicpu_kernel_task_builder.cc" | |||||
| "single_op/single_op.cc" | |||||
| "single_op/single_op_model.cc" | |||||
| "single_op/stream_resource.cc" | |||||
| "single_op/single_op_manager.cc" | |||||
| "hybrid/hybrid_davinci_model_stub.cc" | "hybrid/hybrid_davinci_model_stub.cc" | ||||
| "ir_build/ge_ir_build.cc" | "ir_build/ge_ir_build.cc" | ||||
| "ir_build/atc_ir_common.cc" | "ir_build/atc_ir_common.cc" | ||||
| @@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE | |||||
| FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
| DAVINCI_CLOUD | DAVINCI_CLOUD | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_compile_options(ge_runner PRIVATE | target_compile_options(ge_runner PRIVATE | ||||
| -O2 | -O2 | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | ||||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | ||||
| ) | ) | ||||
| @@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_options(ge_runner PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_runner PRIVATE | target_link_libraries(ge_runner PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| adump_server | adump_server | ||||
| @@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE | |||||
| FMK_HOST_INFER | FMK_HOST_INFER | ||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_compile_options(ge_compiler PRIVATE | target_compile_options(ge_compiler PRIVATE | ||||
| -O2 | -O2 | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | ||||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | ||||
| ) | ) | ||||
| @@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_options(ge_compiler PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_compiler PRIVATE | target_link_libraries(ge_compiler PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| static_mmpa | static_mmpa | ||||
| @@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE | |||||
| -O2 | -O2 | ||||
| -fvisibility=hidden | -fvisibility=hidden | ||||
| ) | ) | ||||
| target_link_options(opensrc_ascendcl PRIVATE | target_link_options(opensrc_ascendcl PRIVATE | ||||
| -rdynamic | -rdynamic | ||||
| -Wl,--allow-multiple-definition | -Wl,--allow-multiple-definition | ||||
| @@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE | |||||
| -Wl,-Bsymbolic | -Wl,-Bsymbolic | ||||
| -Wl,--exclude-libs,ALL | -Wl,--exclude-libs,ALL | ||||
| ) | ) | ||||
| target_link_libraries(opensrc_ascendcl PRIVATE | target_link_libraries(opensrc_ascendcl PRIVATE | ||||
| -Wl,--whole-archive | -Wl,--whole-archive | ||||
| ge_executor | ge_executor | ||||
| @@ -12,7 +12,7 @@ set(PROTO_LIST | |||||
| "${METADEF_DIR}/proto/tensorflow/tensor.proto" | "${METADEF_DIR}/proto/tensorflow/tensor.proto" | ||||
| "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" | "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" | ||||
| "${METADEF_DIR}/proto/tensorflow/types.proto" | "${METADEF_DIR}/proto/tensorflow/types.proto" | ||||
| "${METADEF_DIR}/proto/tensorflow/versions.proto" | |||||
| "${METADEF_DIR}/proto/tensorflow/versions.proto" | |||||
| ) | ) | ||||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
| @@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE | |||||
| FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
| OS_CENTOS | OS_CENTOS | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_compile_options(ge_common PRIVATE | target_compile_options(ge_common PRIVATE | ||||
| @@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_options(ge_common PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_common PRIVATE | target_link_libraries(ge_common PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| static_mmpa | static_mmpa | ||||
| @@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE | |||||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | ||||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | ||||
| LOG_CPP | LOG_CPP | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_compile_options(ge_common_static PRIVATE | target_compile_options(ge_common_static PRIVATE | ||||
| @@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE | |||||
| OS_CENTOS | OS_CENTOS | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_compile_options(ge_common PRIVATE | target_compile_options(ge_common PRIVATE | ||||
| @@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_options(ge_common PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_common PRIVATE | target_link_libraries(ge_common PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ascend_protobuf_static | ascend_protobuf_static | ||||
| @@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const { | |||||
| return false; | return false; | ||||
| } | } | ||||
| // Compare allocated graph id info between json and VarManager | // Compare allocated graph id info between json and VarManager | ||||
| std::unordered_map<std::string, uint32_t> allocated_graph_id; | |||||
| std::map<std::string, uint32_t> allocated_graph_id; | |||||
| auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("Fail to parse AllocatedGraphId from Json."); | GELOGW("Fail to parse AllocatedGraphId from Json."); | ||||
| @@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const { | |||||
| return false; | return false; | ||||
| } | } | ||||
| // Compare variable changed graph id info between json and VarManager | // Compare variable changed graph id info between json and VarManager | ||||
| std::unordered_map<std::string, uint32_t> changed_graph_id; | |||||
| std::map<std::string, uint32_t> changed_graph_id; | |||||
| auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("Fail to parse ChangedGraphId from Json."); | GELOGW("Fail to parse ChangedGraphId from Json."); | ||||
| @@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const { | |||||
| } | } | ||||
| // Compare variable address info between json and VarManager | // Compare variable address info between json and VarManager | ||||
| std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | ||||
| std::unordered_set<uint64_t> var_offset_set; | |||||
| std::set<uint64_t> var_offset_set; | |||||
| auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("Fail to parse VarAddrMgrMap from Json."); | GELOGW("Fail to parse VarAddrMgrMap from Json."); | ||||
| @@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const { | |||||
| GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| std::unordered_map<std::string, uint32_t> allocated_graph_id; | |||||
| std::map<std::string, uint32_t> allocated_graph_id; | |||||
| auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("Fail to parse AllocatedGraphId from Json."); | GELOGW("Fail to parse AllocatedGraphId from Json."); | ||||
| @@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const { | |||||
| GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| std::unordered_map<std::string, uint32_t> changed_graph_id; | |||||
| std::map<std::string, uint32_t> changed_graph_id; | |||||
| auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("Fail to parse AllocatedGraphId from Json."); | GELOGW("Fail to parse AllocatedGraphId from Json."); | ||||
| @@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | ||||
| std::unordered_set<uint64_t> var_offset_set; | |||||
| std::set<uint64_t> var_offset_set; | |||||
| auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("Fail to parse VarAddrMgrMap from Json."); | GELOGW("Fail to parse VarAddrMgrMap from Json."); | ||||
| @@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemTyp | |||||
| Status ModelCacheHelper::ParseVarAddrMgrMapFromJson( | Status ModelCacheHelper::ParseVarAddrMgrMapFromJson( | ||||
| const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | ||||
| std::unordered_set<uint64_t> &var_offset_set) { | |||||
| std::set<uint64_t> &var_offset_set) { | |||||
| if (!(json.is_array() || json.is_null())) { | if (!(json.is_array() || json.is_null())) { | ||||
| GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson( | |||||
| } | } | ||||
| Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | ||||
| std::unordered_map<std::string, uint32_t> &changed_graph_id) { | |||||
| std::map<std::string, uint32_t> &changed_graph_id) { | |||||
| if (!(json.is_array() || json.is_null())) { | if (!(json.is_array() || json.is_null())) { | ||||
| GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | |||||
| } | } | ||||
| Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json, | Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json, | ||||
| std::unordered_map<std::string, uint32_t> &allocated_graph_id) { | |||||
| std::map<std::string, uint32_t> &allocated_graph_id) { | |||||
| if (!(json.is_array() || json.is_null())) { | if (!(json.is_array() || json.is_null())) { | ||||
| GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -95,15 +95,15 @@ class ModelCacheHelper { | |||||
| static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource); | static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource); | ||||
| static Status ParseVarAddrMgrMapFromJson(const Json &json, | static Status ParseVarAddrMgrMapFromJson(const Json &json, | ||||
| std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | ||||
| std::unordered_set<uint64_t> &var_offset_set); | |||||
| std::set<uint64_t> &var_offset_set); | |||||
| static Status ParseCurVarTensorDescMapFromJson( | static Status ParseCurVarTensorDescMapFromJson( | ||||
| const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map); | const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map); | ||||
| static Status ParseTransRoadsFromJson(const Json &json, | static Status ParseTransRoadsFromJson(const Json &json, | ||||
| std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads); | std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads); | ||||
| static Status ParseChangedGraphIdFromJson(const Json &json, | static Status ParseChangedGraphIdFromJson(const Json &json, | ||||
| std::unordered_map<std::string, uint32_t> &changed_graph_id); | |||||
| std::map<std::string, uint32_t> &changed_graph_id); | |||||
| static Status ParseAllocatedGraphIdFromJson(const Json &json, | static Status ParseAllocatedGraphIdFromJson(const Json &json, | ||||
| std::unordered_map<std::string, uint32_t> &allocated_graph_id); | |||||
| std::map<std::string, uint32_t> &allocated_graph_id); | |||||
| static Status ParseBroadcastInfoFromJson(const Json &json, | static Status ParseBroadcastInfoFromJson(const Json &json, | ||||
| std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info); | std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info); | ||||
| static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name); | static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name); | ||||
| @@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| std::unordered_set<uint32_t> record; | |||||
| std::set<uint32_t> record; | |||||
| for (size_t i = 0; i < device_nums; ++i) { | for (size_t i = 0; i < device_nums; ++i) { | ||||
| uint32_t dev_id = deviceid_list[i]; | uint32_t dev_id = deviceid_list[i]; | ||||
| if (dev_id >= static_cast<uint32_t>(dev_count)) { | if (dev_id >= static_cast<uint32_t>(dev_count)) { | ||||
| @@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE | |||||
| $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | ||||
| $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd> | $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd> | ||||
| $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT> | $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT> | ||||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||||
| $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||||
| ) | ) | ||||
| target_compile_definitions(ge_executor PRIVATE | target_compile_definitions(ge_executor PRIVATE | ||||
| @@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE | |||||
| LOG_CPP | LOG_CPP | ||||
| ) | ) | ||||
| target_include_directories(ge_executor PRIVATE | |||||
| target_include_directories(ge_executor SYSTEM PRIVATE | |||||
| ${GE_CODE_DIR}/ge | ${GE_CODE_DIR}/ge | ||||
| ${GE_CODE_DIR}/inc | ${GE_CODE_DIR}/inc | ||||
| ${GE_CODE_DIR}/inc/external | ${GE_CODE_DIR}/inc/external | ||||
| @@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE | |||||
| -Werror | -Werror | ||||
| -O2 | -O2 | ||||
| -Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(ge_executor_shared PRIVATE | target_compile_definitions(ge_executor_shared PRIVATE | ||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| DAVINCI_SUPPORT_PROFILING | DAVINCI_SUPPORT_PROFILING | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(ge_executor_shared PRIVATE | target_include_directories(ge_executor_shared PRIVATE | ||||
| @@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(ge_executor_shared PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_executor_shared PRIVATE | target_link_libraries(ge_executor_shared PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| msprofiler | msprofiler | ||||
| @@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(ge_local_engine PRIVATE | target_compile_options(ge_local_engine PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(ge_local_engine PRIVATE | target_compile_definitions(ge_local_engine PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(ge_local_engine PRIVATE | target_include_directories(ge_local_engine PRIVATE | ||||
| @@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(ge_local_engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_local_engine PRIVATE | target_link_libraries(ge_local_engine PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(atc_ge_local_engine PRIVATE | target_compile_options(atc_ge_local_engine PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(atc_ge_local_engine PRIVATE | target_compile_definitions(atc_ge_local_engine PRIVATE | ||||
| COMPILE_OMG_PACKAGE | |||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(atc_ge_local_engine PRIVATE | target_include_directories(atc_ge_local_engine PRIVATE | ||||
| @@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(atc_ge_local_engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_ge_local_engine PRIVATE | target_link_libraries(atc_ge_local_engine PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR | |||||
| target_compile_options(ge_local_opskernel_builder PRIVATE | target_compile_options(ge_local_opskernel_builder PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(ge_local_opskernel_builder PRIVATE | target_compile_definitions(ge_local_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(ge_local_opskernel_builder PRIVATE | target_include_directories(ge_local_opskernel_builder PRIVATE | ||||
| @@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(ge_local_opskernel_builder PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_local_opskernel_builder PRIVATE | target_link_libraries(ge_local_opskernel_builder PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO | |||||
| target_compile_options(atc_ge_local_opskernel_builder PRIVATE | target_compile_options(atc_ge_local_opskernel_builder PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(atc_ge_local_opskernel_builder PRIVATE | target_include_directories(atc_ge_local_opskernel_builder PRIVATE | ||||
| @@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(atc_ge_local_opskernel_builder PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_ge_local_opskernel_builder PRIVATE | target_link_libraries(atc_ge_local_opskernel_builder PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR | |||||
| target_compile_options(ge_local_opskernel_builder_static PRIVATE | target_compile_options(ge_local_opskernel_builder_static PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(ge_local_opskernel_builder_static PRIVATE | target_include_directories(ge_local_opskernel_builder_static PRIVATE | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | #ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | ||||
| #define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | #define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| @@ -32,7 +46,7 @@ namespace ge_local { | |||||
| * ge local engine. | * ge local engine. | ||||
| * Used for the ops not belong to any engine. eg:netoutput | * Used for the ops not belong to any engine. eg:netoutput | ||||
| */ | */ | ||||
| class GeLocalEngine { | |||||
| class GE_FUNC_VISIBILITY GeLocalEngine { | |||||
| public: | public: | ||||
| /** | /** | ||||
| * get GeLocalEngine instance. | * get GeLocalEngine instance. | ||||
| @@ -94,25 +108,25 @@ extern "C" { | |||||
| * When Ge start, GE will invoke this interface | * When Ge start, GE will invoke this interface | ||||
| * @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
| */ | */ | ||||
| ge::Status Initialize(const map<string, string> &options); | |||||
| GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||||
| /** | /** | ||||
| * After the initialize, GE will invoke this interface to get the Ops kernel Store | * After the initialize, GE will invoke this interface to get the Ops kernel Store | ||||
| * @param ops_kernel_map The ge local's ops kernel info | * @param ops_kernel_map The ge local's ops kernel info | ||||
| */ | */ | ||||
| void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
| GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
| /** | /** | ||||
| * After the initialize, GE will invoke this interface to get the Graph Optimizer | * After the initialize, GE will invoke this interface to get the Graph Optimizer | ||||
| * @param graph_optimizers The ge local's Graph Optimizer objs | * @param graph_optimizers The ge local's Graph Optimizer objs | ||||
| */ | */ | ||||
| void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
| GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
| /** | /** | ||||
| * When the graph finished, GE will invoke this interface | * When the graph finished, GE will invoke this interface | ||||
| * @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
| */ | */ | ||||
| ge::Status Finalize(); | |||||
| GE_FUNC_VISIBILITY ge::Status Finalize(); | |||||
| } | } | ||||
| #endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | #endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | ||||
| @@ -16,6 +16,20 @@ | |||||
| #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
| #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <mutex> | #include <mutex> | ||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| @@ -23,7 +37,7 @@ | |||||
| #include "external/../register/register.h" | #include "external/../register/register.h" | ||||
| namespace ge { | namespace ge { | ||||
| class HostCpuEngine { | |||||
| class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
| public: | public: | ||||
| ~HostCpuEngine() = default; | ~HostCpuEngine() = default; | ||||
| @@ -22,7 +22,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace ge_local { | namespace ge_local { | ||||
| class GeLocalOpsKernelBuilder : public OpsKernelBuilder { | |||||
| class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder { | |||||
| public: | public: | ||||
| ~GeLocalOpsKernelBuilder() override; | ~GeLocalOpsKernelBuilder() override; | ||||
| Status Initialize(const map<std::string, std::string> &options) override; | Status Initialize(const map<std::string, std::string> &options) override; | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | #ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | ||||
| #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| @@ -25,7 +39,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace ge_local { | namespace ge_local { | ||||
| class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
| class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
| public: | public: | ||||
| GeLocalOpsKernelInfoStore() = default; | GeLocalOpsKernelInfoStore() = default; | ||||
| @@ -21,7 +21,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace ge_local { | namespace ge_local { | ||||
| class GeDeletedOp : public Op { | |||||
| class GE_FUNC_VISIBILITY GeDeletedOp : public Op { | |||||
| public: | public: | ||||
| GeDeletedOp(const Node &node, RunContext &run_context); | GeDeletedOp(const Node &node, RunContext &run_context); | ||||
| @@ -21,7 +21,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace ge_local { | namespace ge_local { | ||||
| class NoOp : public Op { | |||||
| class GE_FUNC_VISIBILITY NoOp : public Op { | |||||
| public: | public: | ||||
| NoOp(const Node &node, RunContext &run_context); | NoOp(const Node &node, RunContext &run_context); | ||||
| @@ -29,7 +29,7 @@ namespace ge_local { | |||||
| /** | /** | ||||
| * The base class for all op. | * The base class for all op. | ||||
| */ | */ | ||||
| class Op { | |||||
| class GE_FUNC_VISIBILITY Op { | |||||
| public: | public: | ||||
| Op(const Node &node, RunContext &run_context); | Op(const Node &node, RunContext &run_context); | ||||
| @@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte | |||||
| /** | /** | ||||
| * manage all the op, support create op. | * manage all the op, support create op. | ||||
| */ | */ | ||||
| class OpFactory { | |||||
| class GE_FUNC_VISIBILITY OpFactory { | |||||
| public: | public: | ||||
| static OpFactory &Instance(); | static OpFactory &Instance(); | ||||
| @@ -72,7 +72,7 @@ class OpFactory { | |||||
| std::vector<std::string> all_ops_; | std::vector<std::string> all_ops_; | ||||
| }; | }; | ||||
| class OpRegistrar { | |||||
| class GE_FUNC_VISIBILITY OpRegistrar { | |||||
| public: | public: | ||||
| OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | ||||
| OpFactory::Instance().RegisterCreator(type, func); | OpFactory::Instance().RegisterCreator(type, func); | ||||
| @@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE | |||||
| -fno-common | -fno-common | ||||
| ) | ) | ||||
| target_compile_definitions(ge_runtime PRIVATE | |||||
| target_compile_definitions(ge_runtime PRIVATE | |||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| LOG_CPP | LOG_CPP | ||||
| ) | ) | ||||
| @@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE | |||||
| ${CMAKE_BINARY_DIR}/proto/ge | ${CMAKE_BINARY_DIR}/proto/ge | ||||
| ) | ) | ||||
| target_link_options(ge_runtime PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(ge_runtime PRIVATE | target_link_libraries(ge_runtime PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| } | } | ||||
| } | } | ||||
| reusable_block->continuous_block_ = continuous; | reusable_block->continuous_block_ = continuous; | ||||
| reusable_block->ref_count_++; | |||||
| reusable_blocks_[memory_type][stream_id].erase((++it).base()); | reusable_blocks_[memory_type][stream_id].erase((++it).base()); | ||||
| return reusable_block; | return reusable_block; | ||||
| } | } | ||||
| @@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
| block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | ||||
| block->stream_id_ = node_op_desc->GetStreamId(); | block->stream_id_ = node_op_desc->GetStreamId(); | ||||
| block->ref_count_++; | |||||
| block->continuous_block_ = continuous; | block->continuous_block_ = continuous; | ||||
| block->batch_label_ = batch_label; | block->batch_label_ = batch_label; | ||||
| if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
| @@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| // hccl task need align header and tail | // hccl task need align header and tail | ||||
| block->first_continuous_block_ = true; | block->first_continuous_block_ = true; | ||||
| block->last_continuous_block_ = true; | block->last_continuous_block_ = true; | ||||
| ++(block->ref_count_); | |||||
| } else { | } else { | ||||
| GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| return nullptr, "Get no align size failed"); | return nullptr, "Get no align size failed"); | ||||
| std::string symbol; | std::string symbol; | ||||
| bool reuse_input = false; | |||||
| if (IsSymbolExist(node_index_io, symbol)) { | if (IsSymbolExist(node_index_io, symbol)) { | ||||
| block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
| GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | ||||
| @@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| block->SetLifeTimeEnd(life_time_); | block->SetLifeTimeEnd(life_time_); | ||||
| block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | ||||
| block->ref_count_++; | block->ref_count_++; | ||||
| reuse_input = true; | |||||
| // add new size | // add new size | ||||
| align_size = block_size; | align_size = block_size; | ||||
| @@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | ||||
| } | } | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | ||||
| int out_count_reuse_input = block->ref_count_; | |||||
| int out_count = 0; | int out_count = 0; | ||||
| GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | ||||
| auto out_data_anchor = n->GetOutDataAnchor(index); | auto out_data_anchor = n->GetOutDataAnchor(index); | ||||
| @@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| out_count++; | out_count++; | ||||
| } | } | ||||
| } | } | ||||
| bool reuse_input = false; | |||||
| for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
| auto owner_node = in_anchor->GetOwnerNode(); | |||||
| GE_IF_BOOL_EXEC(owner_node == nullptr, continue); | |||||
| auto op_desc = owner_node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||||
| for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | |||||
| bool dst_reuse_input = false; | |||||
| uint32_t dst_reuse_input_index = 0; | |||||
| auto owner_node_op_desc = op_desc->GetOutputDescPtr(i); | |||||
| GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue); | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS, | |||||
| GELOGI("Get dst_reuse_input failed")); | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | |||||
| GELOGI("Get dst_reuse_input_index failed")); | |||||
| if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | |||||
| out_count_reuse_input += 1; | |||||
| reuse_input = true; | |||||
| } | |||||
| } | |||||
| } | |||||
| block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count; | |||||
| block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1) | |||||
| : (block->ref_count_ + out_count); | |||||
| return block; | return block; | ||||
| } | } | ||||
| @@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec | |||||
| GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index, | GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index, | ||||
| node_type_indexs.back().node->GetName().c_str()); | node_type_indexs.back().node->GetName().c_str()); | ||||
| if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||||
| (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) { | |||||
| bool is_block_matched = false; | |||||
| for (auto &node_type_index : node_type_indexs) { | |||||
| is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||||
| (node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())); | |||||
| if (is_block_matched) { | |||||
| GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, " | |||||
| "current node:%s, input index:%d, block ref_count:%d.", | |||||
| node_type_index.node->GetName().c_str(), node_type_index.index, | |||||
| node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_); | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (is_block_matched) { | |||||
| ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); | ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); | ||||
| if (block->ref_count_ == 0 && block->same_stream_) { | if (block->ref_count_ == 0 && block->same_stream_) { | ||||
| SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); | SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); | ||||
| } | } | ||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_ | |||||
| return; | return; | ||||
| } | } | ||||
| void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) { | |||||
| if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) { | |||||
| return; | |||||
| } | |||||
| int64_t stream_id = node->GetOpDesc()->GetStreamId(); | |||||
| auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx)); | |||||
| bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0); | |||||
| if (is_suspended) { | |||||
| block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1); | |||||
| stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block); | |||||
| GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, " | |||||
| "size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_); | |||||
| } | |||||
| } | |||||
| Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
| @@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| // Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
| GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
| for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | ||||
| ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | |||||
| ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); | |||||
| iter->second[stream_id].clear();}); | |||||
| if (IsContinuousOutput(node)) { | if (IsContinuousOutput(node)) { | ||||
| return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | ||||
| } | } | ||||
| @@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| continue; | continue; | ||||
| } | } | ||||
| symbol_blocks_[iter->second] = mem_block; | symbol_blocks_[iter->second] = mem_block; | ||||
| // The output is suspended, and will be released in allocation of next node. | |||||
| CheckAndReleaseSuspendedBlock(node, i, mem_block); | |||||
| } | } | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { | if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { | ||||
| return; | return; | ||||
| } | } | ||||
| for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { | |||||
| iter->second[stream_id].clear(); | |||||
| } | |||||
| vector<int64_t> temp; | vector<int64_t> temp; | ||||
| int64_t tatal_size = 0; | int64_t tatal_size = 0; | ||||
| GetNodeWorkSpaceSize(n, temp, tatal_size); | GetNodeWorkSpaceSize(n, temp, tatal_size); | ||||
| @@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag, | kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag, | ||||
| is_op_reuse_mem_, false, memory_type); | is_op_reuse_mem_, false, memory_type); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | ||||
| ++(mem_block->ref_count_); | |||||
| CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); | CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); | ||||
| } | } | ||||
| for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | ||||
| @@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner { | |||||
| void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | ||||
| void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block); | |||||
| std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | ||||
| std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | ||||
| @@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
| std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_; | std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_; | ||||
| std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | |||||
| std::map<std::string, uint32_t> node_continuous_input_counts_; | |||||
| // reuse memory | // reuse memory | ||||
| vector<string> op_no_reuse_mem_vec_; | vector<string> op_no_reuse_mem_vec_; | ||||
| @@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
| GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | ||||
| "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | ||||
| node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), | |||||
| peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | |||||
| output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | ||||
| is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | ||||
| } | } | ||||
| @@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||||
| } | } | ||||
| GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" | GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" | ||||
| " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | ||||
| node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), | |||||
| out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(), | |||||
| output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, | output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, | ||||
| is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); | is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); | ||||
| } | } | ||||
| @@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even | |||||
| // Create rt label | // Create rt label | ||||
| for (uint32_t i = 0; i < label_num; ++i) { | for (uint32_t i = 0; i < label_num; ++i) { | ||||
| rtLabel_t label = nullptr; | rtLabel_t label = nullptr; | ||||
| rt_ret = rtLabelCreate(&label); | |||||
| rt_ret = rtLabelCreateV2(&label, rt_model_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i); | GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i); | ||||
| return RT_FAILED; | return RT_FAILED; | ||||
| @@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
| } | } | ||||
| } | } | ||||
| Status status = ReorderEventNodes(); | |||||
| Status status = whole_graph_->InsertGraphEvents(); | |||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(status, "Graph ReorderEventNodes failed"); | GELOGE(status, "Graph ReorderEventNodes failed"); | ||||
| return status; | return status; | ||||
| @@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status StreamAllocator::ReorderEventNodes() const { | |||||
| Status status = whole_graph_->InsertEventNodes(); | |||||
| if (status != SUCCESS) { | |||||
| GELOGE(status, "Whole graph InsertEventNodes failed"); | |||||
| return status; | |||||
| } | |||||
| for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) { | |||||
| status = subgraph->InsertEventNodes(); | |||||
| if (status != SUCCESS) { | |||||
| GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str()); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void StreamAllocator::DumpEvents() { | void StreamAllocator::DumpEvents() { | ||||
| map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | ||||
| @@ -74,7 +74,6 @@ class StreamAllocator { | |||||
| Status RefreshContinuousEvents(); | Status RefreshContinuousEvents(); | ||||
| Status InsertSyncEventNodes(); | Status InsertSyncEventNodes(); | ||||
| Status ReorderEventNodes() const; | |||||
| void DumpEvents(); | void DumpEvents(); | ||||
| @@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||||
| // and it have no attr or group attr different | // and it have no attr or group attr different | ||||
| // which means bad case, return error | // which means bad case, return error | ||||
| bool call_check = true; | bool call_check = true; | ||||
| std::unordered_set<int64_t> input_group_ids; | |||||
| std::set<int64_t> input_group_ids; | |||||
| for (const auto &input_node : node->GetInNodes()) { | for (const auto &input_node : node->GetInNodes()) { | ||||
| auto iter = nodes_with_group_attr.find(input_node); | auto iter = nodes_with_group_attr.find(input_node); | ||||
| if (iter == nodes_with_group_attr.end()) { | if (iter == nodes_with_group_attr.end()) { | ||||
| @@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
| return GE_GRAPH_GRAPH_NODE_NULL; | return GE_GRAPH_GRAPH_NODE_NULL; | ||||
| } | } | ||||
| int64_t node_index = 0; | |||||
| for (auto &node : all_nodes) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| op_desc->SetId(node_index++); | |||||
| } | |||||
| map<int64_t, vector<OpDescPtr>> all_stream_ops; | map<int64_t, vector<OpDescPtr>> all_stream_ops; | ||||
| for (auto &node : all_nodes) { | for (auto &node : all_nodes) { | ||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| @@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
| } | } | ||||
| if (graph->GetNeedIteration()) { | if (graph->GetNeedIteration()) { | ||||
| if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||||
| if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { | |||||
| profiling_point.end_index.insert(current_idx); | profiling_point.end_index.insert(current_idx); | ||||
| GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | ||||
| op_desc->GetName().c_str(), current_idx); | op_desc->GetName().c_str(), current_idx); | ||||
| @@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { | |||||
| static TransOpUtil &Instance(); | static TransOpUtil &Instance(); | ||||
| typedef std::unordered_map<std::string, int> transop_index_op; | |||||
| typedef std::map<std::string, int> transop_index_op; | |||||
| transop_index_op transop_index_map_; | transop_index_op transop_index_map_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() { | |||||
| /// @param [in] outside_addrs: model input/output memory addr | /// @param [in] outside_addrs: model input/output memory addr | ||||
| /// @return: 0 for success / others for failed | /// @return: 0 for success / others for failed | ||||
| /// | /// | ||||
| Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||||
| Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) { | |||||
| if ((args_ != nullptr) || (args_size_ > 0)) { | if ((args_ != nullptr) || (args_size_ > 0)) { | ||||
| GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v | |||||
| GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
| AddrMapInfo addr_map_info; | AddrMapInfo addr_map_info; | ||||
| for (auto &addrs : outside_addrs) { | |||||
| auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
| GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||||
| std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||||
| for (const auto &virtual_args_addr : virtual_args_addrs) { | |||||
| addr_map_info.addr_num += virtual_args_addr.second.size(); | |||||
| } | |||||
| } | |||||
| GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||||
| // init src_addrs/dst_addrs | // init src_addrs/dst_addrs | ||||
| size_t index = 0; | |||||
| vector<uint64_t> src_addrs; | vector<uint64_t> src_addrs; | ||||
| vector<uint64_t> dst_addrs; | vector<uint64_t> dst_addrs; | ||||
| for (auto &addrs : outside_addrs) { | |||||
| auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
| for (const auto &addrs : outside_addrs) { | |||||
| const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
| GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | ||||
| std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | ||||
| for (const auto &virtual_args_addr : virtual_args_addrs) { | for (const auto &virtual_args_addr : virtual_args_addrs) { | ||||
| addr_map_info.addr_num += virtual_args_addr.second.size(); | |||||
| for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | ||||
| src_addrs.push_back(mbuf_list.at(index)); | |||||
| src_addrs.emplace_back(mbuf_list.at(addrs.first)); | |||||
| dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | ||||
| } | } | ||||
| } | } | ||||
| index++; | |||||
| } | } | ||||
| GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||||
| // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | ||||
| GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | ||||
| @@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { | |||||
| ~CpuTaskZeroCopy() override; | ~CpuTaskZeroCopy() override; | ||||
| Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | ||||
| Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
| Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs); | |||||
| Status Distribute() override; | Status Distribute() override; | ||||
| private: | private: | ||||
| @@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| }; | }; | ||||
| vector<OpDescPtr> output_op_list; | vector<OpDescPtr> output_op_list; | ||||
| set<const void *> input_outside_addrs; | |||||
| set<const void *> output_outside_addrs; | |||||
| map<uint32_t, OpDescPtr> data_by_index; | map<uint32_t, OpDescPtr> data_by_index; | ||||
| map<string, OpDescPtr> variable_by_name; | map<string, OpDescPtr> variable_by_name; | ||||
| auto nodes = compute_graph->GetAllNodes(); | auto nodes = compute_graph->GetAllNodes(); | ||||
| @@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | ||||
| if (IsDataOp(op_desc->GetType())) { | if (IsDataOp(op_desc->GetType())) { | ||||
| if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { | |||||
| if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| if (op_desc->GetType() == NETOUTPUT) { | if (op_desc->GetType() == NETOUTPUT) { | ||||
| if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { | |||||
| if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | ||||
| map<uint32_t, OpDescPtr> &data_by_index) { | |||||
| map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) { | |||||
| // op_desc Checked by Init: Data, valid. | // op_desc Checked by Init: Data, valid. | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (node->GetOwnerComputeGraph() != graph) { | if (node->GetOwnerComputeGraph() != graph) { | ||||
| @@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||||
| GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| new_input_data_info_[data_index] = zero_copy_offset; | |||||
| for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | |||||
| void *addr = virtual_addr_list.at(index); | |||||
| if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) { | |||||
| continue; | |||||
| } | |||||
| zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_); | |||||
| new_input_outside_addrs_[addr] = zero_copy_offset; | |||||
| if (input_outside_addrs.count(virtual_addr) == 0) { | |||||
| int64_t output_offset = output_offset_list.at(kDataIndex); | |||||
| zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_); | |||||
| input_outside_addrs.insert(virtual_addr); | |||||
| } | } | ||||
| input_data_info_[data_index] = zero_copy_offset; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||||
| /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | ||||
| /// @return Status | /// @return Status | ||||
| Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | ||||
| vector<OpDescPtr> &output_op_list) { | |||||
| vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) { | |||||
| // node->GetOpDesc Checked by Init: NetOutput, valid. | // node->GetOpDesc Checked by Init: NetOutput, valid. | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| // excludes the function op sub graph, e.g. case,if | // excludes the function op sub graph, e.g. case,if | ||||
| @@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| size_t num = new_output_data_info_.size(); | |||||
| size_t num = output_data_info_.size(); | |||||
| bool fusion_flag = false; | bool fusion_flag = false; | ||||
| size_t input_count = input_size_list.size(); | size_t input_count = input_size_list.size(); | ||||
| @@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||||
| Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); | Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); | ||||
| GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", | GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", | ||||
| op_desc->GetName().c_str()); return PARAM_INVALID;); | op_desc->GetName().c_str()); return PARAM_INVALID;); | ||||
| new_output_data_info_[num + idx] = zero_copy_offset; | |||||
| void *addr = virtual_addr_list.at(idx); | void *addr = virtual_addr_list.at(idx); | ||||
| int64_t input_offset = input_offset_list.at(idx); | int64_t input_offset = input_offset_list.at(idx); | ||||
| vector<void *> tensor_addrs; | |||||
| zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | |||||
| auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset)); | |||||
| if (!rslt.second) { | |||||
| if (output_outside_addrs.count(addr) == 0) { | |||||
| vector<void *> tensor_addrs; | |||||
| zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | |||||
| output_outside_addrs.insert(addr); | |||||
| for (size_t i = 0; i < tensor_addrs.size(); ++i) { | |||||
| void *real_addr = tensor_addrs.at(i); | |||||
| DisableZeroCopy(real_addr); | |||||
| real_virtual_addrs_.insert(real_addr); | |||||
| } | |||||
| } else { | |||||
| GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); | GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); | ||||
| DisableZeroCopy(addr); | DisableZeroCopy(addr); | ||||
| } | } | ||||
| for (size_t i = 0; i < tensor_addrs.size(); ++i) { | |||||
| void *real_addr = tensor_addrs.at(i); | |||||
| DisableZeroCopy(real_addr); | |||||
| real_virtual_addrs_.insert(real_addr); | |||||
| } | |||||
| output_data_info_[num + idx] = zero_copy_offset; | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||||
| } | } | ||||
| rtLabel_t rt_label = nullptr; | rtLabel_t rt_label = nullptr; | ||||
| rtError_t rt_error = rtLabelCreateEx(&rt_label, stream); | |||||
| rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream); | |||||
| if (rt_error != RT_ERROR_NONE || rt_label == nullptr) { | if (rt_error != RT_ERROR_NONE || rt_label == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error); | GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (input_queue_ids_.size() != new_input_data_info_.size()) { | |||||
| if (input_queue_ids_.size() != input_data_info_.size()) { | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", | GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", | ||||
| input_queue_ids_.size(), new_input_data_info_.size()); | |||||
| input_queue_ids_.size(), input_data_info_.size()); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | ||||
| } | } | ||||
| if (output_queue_ids_.size() != new_output_data_info_.size()) { | |||||
| if (output_queue_ids_.size() != output_data_info_.size()) { | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, | GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, | ||||
| "Output queue ids not match model: output_queue=%zu output_data=%zu", | "Output queue ids not match model: output_queue=%zu output_data=%zu", | ||||
| output_queue_ids_.size(), new_output_data_info_.size()); | |||||
| output_queue_ids_.size(), output_data_info_.size()); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); | GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); | ||||
| // Binding input_queue and Data Op. | // Binding input_queue and Data Op. | ||||
| GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); | GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); | ||||
| GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed."); | |||||
| GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed."); | |||||
| // Binding output_queue and NetOutput Op. | // Binding output_queue and NetOutput Op. | ||||
| GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); | GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); | ||||
| GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed."); | |||||
| GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed."); | |||||
| GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); | GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); | ||||
| GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); | GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); | ||||
| @@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() { | |||||
| Status DavinciModel::BindInputQueue() { | Status DavinciModel::BindInputQueue() { | ||||
| // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | ||||
| for (size_t i = 0; i < input_queue_ids_.size(); ++i) { | for (size_t i = 0; i < input_queue_ids_.size(); ++i) { | ||||
| auto it = new_input_data_info_.find(i); | |||||
| if (it == new_input_data_info_.end()) { | |||||
| GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i); | |||||
| auto it = input_data_info_.find(i); | |||||
| if (it == input_data_info_.end()) { | |||||
| GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { | |||||
| } | } | ||||
| Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | ||||
| std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||||
| const map<uint32_t, ZeroCopyOffset> &outside_addrs) { | |||||
| GELOGI("Set CpuKernel model zero_copy task enter."); | GELOGI("Set CpuKernel model zero_copy task enter."); | ||||
| std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_); | std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_); | ||||
| if (zero_copy == nullptr) { | if (zero_copy == nullptr) { | ||||
| @@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | |||||
| Status DavinciModel::BindOutputQueue() { | Status DavinciModel::BindOutputQueue() { | ||||
| // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | ||||
| for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | ||||
| auto it = new_output_data_info_.find(i); | |||||
| if (it == new_output_data_info_.end()) { | |||||
| GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); | |||||
| auto it = output_data_info_.find(i); | |||||
| if (it == output_data_info_.end()) { | |||||
| GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() { | |||||
| Status DavinciModel::BindEnqueue() { | Status DavinciModel::BindEnqueue() { | ||||
| for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | ||||
| auto it = new_output_data_info_.find(i); | |||||
| if (it == new_output_data_info_.end()) { | |||||
| GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); | |||||
| auto it = output_data_info_.find(i); | |||||
| if (it == output_data_info_.end()) { | |||||
| GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs | |||||
| Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | ||||
| rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | ||||
| const std::vector<DataBuffer> &blobs = input_data.blobs; | const std::vector<DataBuffer> &blobs = input_data.blobs; | ||||
| for (const auto &data : new_input_data_info_) { | |||||
| for (const auto &data : input_data_info_) { | |||||
| if (data.first >= blobs.size()) { | if (data.first >= blobs.size()) { | ||||
| GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), | GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), | ||||
| new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, | |||||
| input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, | |||||
| data.second.GetOpName().c_str()); | data.second.GetOpName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
| output_data.index = data_id; | output_data.index = data_id; | ||||
| output_data.model_id = model_id_; | output_data.model_id = model_id_; | ||||
| if (output_data.blobs.size() != new_output_data_info_.size()) { | |||||
| if (output_data.blobs.size() != output_data_info_.size()) { | |||||
| GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), | GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), | ||||
| new_output_data_info_.size()); | |||||
| output_data_info_.size()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| std::vector<DataBuffer> &blobs = output_data.blobs; | std::vector<DataBuffer> &blobs = output_data.blobs; | ||||
| size_t idx = 0; | size_t idx = 0; | ||||
| for (const auto &output : new_output_data_info_) { | |||||
| for (const auto &output : output_data_info_) { | |||||
| if (output.first >= blobs.size()) { | if (output.first >= blobs.size()) { | ||||
| GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), | GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), | ||||
| new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); | |||||
| input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||||
| /// @return None. | /// @return None. | ||||
| /// | /// | ||||
| void DavinciModel::SetCopyOnlyOutput() { | void DavinciModel::SetCopyOnlyOutput() { | ||||
| for (const auto &output_outside_addrs : new_output_outside_addrs_) { | |||||
| for (const auto &output_outside_addrs : output_data_info_) { | |||||
| ZeroCopyOffset output_outside = output_outside_addrs.second; | ZeroCopyOffset output_outside = output_outside_addrs.second; | ||||
| if (!output_outside.IsRelativeOffsetValid()) { | |||||
| return; | |||||
| } | |||||
| for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { | for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { | ||||
| auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); | auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); | ||||
| std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count]; | std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count]; | ||||
| @@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
| for (size_t i = 0; i < nums; ++i) { | for (size_t i = 0; i < nums; ++i) { | ||||
| std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | ||||
| for (auto &input_outside_addrs : new_input_outside_addrs_) { | |||||
| for (auto &input_outside_addrs : input_data_info_) { | |||||
| ZeroCopyOffset &input_outside = input_outside_addrs.second; | ZeroCopyOffset &input_outside = input_outside_addrs.second; | ||||
| input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | ||||
| } | } | ||||
| for (auto &output_outside_addrs : new_output_outside_addrs_) { | |||||
| for (auto &output_outside_addrs : output_data_info_) { | |||||
| ZeroCopyOffset &output_outside = output_outside_addrs.second; | ZeroCopyOffset &output_outside = output_outside_addrs.second; | ||||
| output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | ||||
| } | } | ||||
| @@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||||
| /// @return SUCCESS handle successfully / PARAM_INVALID for failed | /// @return SUCCESS handle successfully / PARAM_INVALID for failed | ||||
| /// | /// | ||||
| Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { | Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { | ||||
| if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { | |||||
| if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != | |||||
| if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != | |||||
| SUCCESS) { | SUCCESS) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| @@ -675,7 +675,7 @@ class DavinciModel { | |||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | ||||
| map<uint32_t, OpDescPtr> &data_by_index); | |||||
| map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -694,7 +694,8 @@ class DavinciModel { | |||||
| /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | ||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list); | |||||
| Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list, | |||||
| set<const void *> &output_outside_addrs); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -764,7 +765,7 @@ class DavinciModel { | |||||
| /// | /// | ||||
| Status BindInputQueue(); | Status BindInputQueue(); | ||||
| Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
| Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -897,10 +898,8 @@ class DavinciModel { | |||||
| void *global_step_addr_{nullptr}; | void *global_step_addr_{nullptr}; | ||||
| uint64_t global_step_size_{0}; | uint64_t global_step_size_{0}; | ||||
| map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||||
| map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||||
| map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||||
| map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||||
| map<uint32_t, ZeroCopyOffset> input_data_info_; | |||||
| map<uint32_t, ZeroCopyOffset> output_data_info_; | |||||
| set<const void *> real_virtual_addrs_; | set<const void *> real_virtual_addrs_; | ||||
| @@ -100,8 +100,8 @@ class TsMemMall { | |||||
| private: | private: | ||||
| std::mutex mem_mutex_; | std::mutex mem_mutex_; | ||||
| std::unordered_map<int64_t, void *> mem_store_size_; | |||||
| std::unordered_map<void *, int64_t> mem_store_addr_; | |||||
| std::map<int64_t, void *> mem_store_size_; | |||||
| std::map<void *, int64_t> mem_store_addr_; | |||||
| rtMemType_t mem_type_; | rtMemType_t mem_type_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const | |||||
| } | } | ||||
| } | } | ||||
| void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||||
| bool fusion_flag, std::set<const void *> &real_virtual_addrs) { | |||||
| void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, | |||||
| set<const void *> &real_virtual_addrs) { | |||||
| uint32_t out_count = 0; | uint32_t out_count = 0; | ||||
| if (!fusion_flag) { | if (!fusion_flag) { | ||||
| out_count++; | out_count++; | ||||
| @@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l | |||||
| real_virtual_addrs.insert(addr); | real_virtual_addrs.insert(addr); | ||||
| } else { | } else { | ||||
| GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); | GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); | ||||
| int64_t output_offset = output_offset_list.at(index); | |||||
| for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { | for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { | ||||
| if (zero_copy_basic_offset_.at(i) == output_offset) { | if (zero_copy_basic_offset_.at(i) == output_offset) { | ||||
| out_count++; | out_count++; | ||||
| @@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l | |||||
| } | } | ||||
| } | } | ||||
| addr_count_ = out_count; | addr_count_ = out_count; | ||||
| valid_relative_offset_ = true; | |||||
| } | } | ||||
| void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | ||||
| @@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||||
| } | } | ||||
| } | } | ||||
| addr_count_ = out_count; | addr_count_ = out_count; | ||||
| valid_relative_offset_ = true; | |||||
| } | } | ||||
| void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | ||||
| if (!valid_relative_offset_) { | |||||
| return; | |||||
| } | |||||
| const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | ||||
| for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | ||||
| auto args_addrs = outside_addrs_[out_count].find(outside_addr); | auto args_addrs = outside_addrs_[out_count].find(outside_addr); | ||||
| @@ -43,8 +43,7 @@ class ZeroCopyOffset { | |||||
| ~ZeroCopyOffset(); | ~ZeroCopyOffset(); | ||||
| Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | ||||
| void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||||
| bool fusion_flag, std::set<const void *> &real_virtual_addrs); | |||||
| void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs); | |||||
| void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); | void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); | ||||
| Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list, | Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list, | ||||
| @@ -65,9 +64,10 @@ class ZeroCopyOffset { | |||||
| // data_size of Data/Netoutput | // data_size of Data/Netoutput | ||||
| int64_t GetDataSize() const { return data_size_; } | int64_t GetDataSize() const { return data_size_; } | ||||
| // value of *outside_addrs_ from davinci_model | // value of *outside_addrs_ from davinci_model | ||||
| const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; } | |||||
| const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; } | |||||
| // name of op | // name of op | ||||
| std::string GetOpName() const { return op_name_; } | std::string GetOpName() const { return op_name_; } | ||||
| const bool IsRelativeOffsetValid() const { return valid_relative_offset_; } | |||||
| private: | private: | ||||
| void *basic_addr_ = nullptr; | void *basic_addr_ = nullptr; | ||||
| @@ -81,6 +81,7 @@ class ZeroCopyOffset { | |||||
| std::vector<int64_t> zero_copy_basic_offset_; | std::vector<int64_t> zero_copy_basic_offset_; | ||||
| std::vector<int64_t> zero_copy_relative_offset_; | std::vector<int64_t> zero_copy_relative_offset_; | ||||
| bool valid_relative_offset_ = false; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ | ||||
| @@ -131,7 +131,7 @@ bool IsTailingOptimization() { | |||||
| } | } | ||||
| ge::Status CheckFpCeilingMode() { | ge::Status CheckFpCeilingMode() { | ||||
| static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"}; | |||||
| static const std::set<std::string> kValidFpCeilingMode = {"0", "1", "2"}; | |||||
| string mode; | string mode; | ||||
| auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); | auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); | ||||
| if (ret == ge::GRAPH_SUCCESS) { | if (ret == ge::GRAPH_SUCCESS) { | ||||
| @@ -170,8 +170,8 @@ class VarResource { | |||||
| std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_; | std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_; | ||||
| std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_; | std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_; | ||||
| std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_; | std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_; | ||||
| std::unordered_map<std::string, uint32_t> var_names_to_changed_graph_id_; | |||||
| std::unordered_map<std::string, uint32_t> var_names_to_allocated_graph_id_; | |||||
| std::map<std::string, uint32_t> var_names_to_changed_graph_id_; | |||||
| std::map<std::string, uint32_t> var_names_to_allocated_graph_id_; | |||||
| std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_; | std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_; | ||||
| }; | }; | ||||
| @@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo | |||||
| /// Avoid recursion since stack space might be limited. | /// Avoid recursion since stack space might be limited. | ||||
| /// We instead keep a stack of nodes to visit. | /// We instead keep a stack of nodes to visit. | ||||
| std::vector<size_t> temp_stack; | std::vector<size_t> temp_stack; | ||||
| std::unordered_set<size_t> visited; | |||||
| std::set<size_t> visited; | |||||
| temp_stack.push_back(src); | temp_stack.push_back(src); | ||||
| while (!temp_stack.empty()) { | while (!temp_stack.empty()) { | ||||
| size_t cluster = temp_stack.back(); | size_t cluster = temp_stack.back(); | ||||
| @@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>; | |||||
| using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>; | using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>; | ||||
| using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>; | using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>; | ||||
| using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>; | using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>; | ||||
| using ClusterSet = std::unordered_set<size_t>; | |||||
| using ClusterSet = std::set<size_t>; | |||||
| class Cluster { | class Cluster { | ||||
| public: | public: | ||||
| size_t index_; // corresponding to rank of node | size_t index_; // corresponding to rank of node | ||||
| @@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||||
| return FoldingPass::RunOpKernel(node, inputs, outputs); | return FoldingPass::RunOpKernel(node, inputs, outputs); | ||||
| } | } | ||||
| const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | ||||
| return statistic_of_ge_constant_folding_; | return statistic_of_ge_constant_folding_; | ||||
| } | } | ||||
| const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | ||||
| return statistic_of_op_constant_folding_; | return statistic_of_op_constant_folding_; | ||||
| } | } | ||||
| @@ -26,11 +26,11 @@ namespace ge { | |||||
| class ConstantFoldingPass : public FoldingPass { | class ConstantFoldingPass : public FoldingPass { | ||||
| public: | public: | ||||
| Status Run(ge::NodePtr &node) override; | Status Run(ge::NodePtr &node) override; | ||||
| const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||||
| const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||||
| private: | private: | ||||
| std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||||
| std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||||
| std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||||
| std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||||
| } | } | ||||
| GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); | GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); | ||||
| if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) { | |||||
| GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); | GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); | ||||
| @@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass { | |||||
| bool IsDataNode(const std::string& node_type); | bool IsDataNode(const std::string& node_type); | ||||
| std::unordered_map<std::string, uint32_t> node_num_map_; | |||||
| std::map<std::string, uint32_t> node_num_map_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass { | |||||
| bool IsDataNode(const std::string& node_type); | bool IsDataNode(const std::string& node_type); | ||||
| std::unordered_map<std::string, uint32_t> node_num_map_; | |||||
| std::map<std::string, uint32_t> node_num_map_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | |||||
| } | } | ||||
| // parser data dynamic info from atc parameter --input_shape | // parser data dynamic info from atc parameter --input_shape | ||||
| if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims, | |||||
| data_to_dynamic_info_) != SUCCESS) { | |||||
| if (CheckAndParseDynamicData() != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); | GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status MultiBatchClonePass::CheckAndParseDynamicData() { | |||||
| size_t unknown_shape_count = 0; | |||||
| auto data_name_and_shape = GetLocalOmgContext().user_input_dims; | |||||
| std::vector<std::string> data_name_order; | |||||
| for (auto &item : data_name_and_shape) { | |||||
| data_name_order.push_back(item.first); | |||||
| } | |||||
| if (!getnext_sink_dynamic_dims_) { | |||||
| for (const auto &node : all_data_nodes_) { | |||||
| auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex); | |||||
| auto data_shape = data_desc.GetShape(); | |||||
| auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" : | |||||
| data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others"; | |||||
| auto data_name = node->GetName(); | |||||
| const auto &data_shape_dims = data_shape.GetDims(); | |||||
| if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) { | |||||
| continue; | |||||
| } | |||||
| ++unknown_shape_count; | |||||
| auto iter = find(data_name_order.begin(), data_name_order.end(), data_name); | |||||
| if (iter == data_name_order.end()) { | |||||
| if (!GetLocalOmgContext().dynamic_batch_size.empty()) { | |||||
| auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name); | |||||
| GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.", | |||||
| data_name.c_str()); return PARAM_INVALID); | |||||
| } else if (!GetLocalOmgContext().dynamic_image_size.empty()) { | |||||
| auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format); | |||||
| GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.", | |||||
| data_name.c_str()); return PARAM_INVALID); | |||||
| } else if (!GetLocalOmgContext().dynamic_dims.empty()) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"}, | |||||
| {"--input_shape", "all dynamic data must be set in --input_shape"}); | |||||
| GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", | |||||
| node->GetName().c_str(), data_shape.ToString().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| data_name_and_shape.emplace_back(data_name, data_shape_dims); | |||||
| } | |||||
| } | |||||
| } | |||||
| auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_); | |||||
| GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | |||||
| if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { | Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { | ||||
| data_count_from_getnext_ = 0; | data_count_from_getnext_ = 0; | ||||
| getnext_sink_dynamic_dims_ = false; | getnext_sink_dynamic_dims_ = false; | ||||
| @@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass { | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num); | Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num); | ||||
| Status CheckAndParseDynamicData(); | |||||
| std::string session_graph_id_; | std::string session_graph_id_; | ||||
| std::vector<std::vector<int64_t>> batch_shapes_; | std::vector<std::vector<int64_t>> batch_shapes_; | ||||
| @@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass { | |||||
| std::vector<NodePtr> stream_switch_nodes_; | std::vector<NodePtr> stream_switch_nodes_; | ||||
| std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_; | std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_; | ||||
| std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_; | std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_; | ||||
| std::unordered_map<std::string, uint32_t> node_num_map_; | |||||
| std::map<std::string, uint32_t> node_num_map_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_ | #endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_ | ||||
| @@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); | |||||
| auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); | |||||
| GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | ||||
| if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | ||||
| @@ -377,7 +377,7 @@ bool InitDynamicParams(vector<vector<int64_t>> &shapes) { | |||||
| /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | ||||
| /// @return true: Configed for Multi batch / false: Not configed for Multi batch. | /// @return true: Configed for Multi batch / false: Not configed for Multi batch. | ||||
| /// | /// | ||||
| Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes, | |||||
| Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes, | |||||
| vector<pair<string, vector<int64_t>>> &data_name_and_shape, | vector<pair<string, vector<int64_t>>> &data_name_and_shape, | ||||
| map<string, vector<vector<int64_t>> > &data_to_dynamic_info) { | map<string, vector<vector<int64_t>> > &data_to_dynamic_info) { | ||||
| size_t cur_data_index = 0; | size_t cur_data_index = 0; | ||||
| @@ -74,7 +74,7 @@ Status CalcShape(const std::vector<int64_t> &batch_shape, GeShape &data_shape); | |||||
| /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | ||||
| /// @return SUCCESS / PARAM_INVALID | /// @return SUCCESS / PARAM_INVALID | ||||
| /// | /// | ||||
| Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes, | |||||
| Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes, | |||||
| vector<pair<string, vector<int64_t>>> &data_name_and_shape, | vector<pair<string, vector<int64_t>>> &data_name_and_shape, | ||||
| map<string, vector<vector<int64_t>>> &data_to_dynamic_info); | map<string, vector<vector<int64_t>>> &data_to_dynamic_info); | ||||
| @@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc); | |||||
| /// @param [in] const string &data_name: cur data name. | /// @param [in] const string &data_name: cur data name. | ||||
| /// @return 0: true/false | /// @return 0: true/false | ||||
| /// | /// | ||||
| bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name); | |||||
| GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_nam | |||||
| /// @param [in] const std::string &input_format: format of input. | /// @param [in] const std::string &input_format: format of input. | ||||
| /// @return 0: true/false | /// @return 0: true/false | ||||
| /// | /// | ||||
| bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||||
| GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||||
| const std::string &input_format); | const std::string &input_format); | ||||
| } // namespace multibatch | } // namespace multibatch | ||||
| @@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(host_cpu_engine PRIVATE | target_compile_options(host_cpu_engine PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(host_cpu_engine PRIVATE | target_compile_definitions(host_cpu_engine PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(host_cpu_engine PRIVATE | target_include_directories(host_cpu_engine PRIVATE | ||||
| @@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(host_cpu_engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(host_cpu_engine PRIVATE | target_link_libraries(host_cpu_engine PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(atc_host_cpu_engine PRIVATE | target_compile_options(atc_host_cpu_engine PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(atc_host_cpu_engine PRIVATE | target_compile_definitions(atc_host_cpu_engine PRIVATE | ||||
| COMPILE_OMG_PACKAGE | |||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(atc_host_cpu_engine PRIVATE | target_include_directories(atc_host_cpu_engine PRIVATE | ||||
| @@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(atc_host_cpu_engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_host_cpu_engine PRIVATE | target_link_libraries(atc_host_cpu_engine PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
| target_compile_options(host_cpu_opskernel_builder PRIVATE | target_compile_options(host_cpu_opskernel_builder PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(host_cpu_opskernel_builder PRIVATE | target_compile_definitions(host_cpu_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(host_cpu_opskernel_builder PRIVATE | target_include_directories(host_cpu_opskernel_builder PRIVATE | ||||
| @@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(host_cpu_opskernel_builder PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(host_cpu_opskernel_builder PRIVATE | target_link_libraries(host_cpu_opskernel_builder PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
| target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | ||||
| @@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(atc_host_cpu_opskernel_builder PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE | target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) | |||||
| target_compile_options(host_cpu_opskernel_builder_static PRIVATE | target_compile_options(host_cpu_opskernel_builder_static PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(host_cpu_opskernel_builder_static PRIVATE | target_include_directories(host_cpu_opskernel_builder_static PRIVATE | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
| #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| @@ -32,7 +46,7 @@ namespace host_cpu { | |||||
| * host cpu engine. | * host cpu engine. | ||||
| * Used for the ops which executes on host. | * Used for the ops which executes on host. | ||||
| */ | */ | ||||
| class HostCpuEngine { | |||||
| class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
| public: | public: | ||||
| /** | /** | ||||
| * get HostCpuEngine instance. | * get HostCpuEngine instance. | ||||
| @@ -87,25 +101,25 @@ extern "C" { | |||||
| * When Ge start, GE will invoke this interface | * When Ge start, GE will invoke this interface | ||||
| * @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
| */ | */ | ||||
| ge::Status Initialize(const map<string, string> &options); | |||||
| GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||||
| /** | /** | ||||
| * After the initialize, GE will invoke this interface to get the Ops kernel Store | * After the initialize, GE will invoke this interface to get the Ops kernel Store | ||||
| * @param ops_kernel_map The host cpu's ops kernel info | * @param ops_kernel_map The host cpu's ops kernel info | ||||
| */ | */ | ||||
| void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
| GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
| /** | /** | ||||
| * After the initialize, GE will invoke this interface to get the Graph Optimizer | * After the initialize, GE will invoke this interface to get the Graph Optimizer | ||||
| * @param graph_optimizers The host cpu's Graph Optimizer objs | * @param graph_optimizers The host cpu's Graph Optimizer objs | ||||
| */ | */ | ||||
| void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
| GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
| /** | /** | ||||
| * When the graph finished, GE will invoke this interface | * When the graph finished, GE will invoke this interface | ||||
| * @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
| */ | */ | ||||
| ge::Status Finalize(); | |||||
| GE_FUNC_VISIBILITY ge::Status Finalize(); | |||||
| } | } | ||||
| #endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
| @@ -17,11 +17,25 @@ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | ||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include "common/opskernel/ops_kernel_builder.h" | #include "common/opskernel/ops_kernel_builder.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace host_cpu { | namespace host_cpu { | ||||
| class HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||||
| class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||||
| public: | public: | ||||
| Status Initialize(const map<std::string, std::string> &options) override; | Status Initialize(const map<std::string, std::string> &options) override; | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | ||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| @@ -25,7 +39,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace host_cpu { | namespace host_cpu { | ||||
| class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
| class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
| public: | public: | ||||
| HostCpuOpsKernelInfoStore() {} | HostCpuOpsKernelInfoStore() {} | ||||
| ~HostCpuOpsKernelInfoStore() override = default; | ~HostCpuOpsKernelInfoStore() override = default; | ||||
| @@ -21,7 +21,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace host_cpu { | namespace host_cpu { | ||||
| class HostOp : public Op { | |||||
| class GE_FUNC_VISIBILITY HostOp : public Op { | |||||
| public: | public: | ||||
| HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | ||||
| ~HostOp() override = default; | ~HostOp() override = default; | ||||
| @@ -29,7 +29,7 @@ namespace host_cpu { | |||||
| /** | /** | ||||
| * The base class for all op. | * The base class for all op. | ||||
| */ | */ | ||||
| class Op { | |||||
| class GE_FUNC_VISIBILITY Op { | |||||
| public: | public: | ||||
| Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} | Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} | ||||
| virtual ~Op() = default; | virtual ~Op() = default; | ||||
| @@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte | |||||
| /** | /** | ||||
| * manage all the op, support create op. | * manage all the op, support create op. | ||||
| */ | */ | ||||
| class OpFactory { | |||||
| class GE_FUNC_VISIBILITY OpFactory { | |||||
| public: | public: | ||||
| static OpFactory &Instance(); | static OpFactory &Instance(); | ||||
| @@ -70,7 +70,7 @@ class OpFactory { | |||||
| std::vector<std::string> all_ops_; | std::vector<std::string> all_ops_; | ||||
| }; | }; | ||||
| class OpRegistrar { | |||||
| class GE_FUNC_VISIBILITY OpRegistrar { | |||||
| public: | public: | ||||
| OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | ||||
| OpFactory::Instance().RegisterCreator(type, func); | OpFactory::Instance().RegisterCreator(type, func); | ||||
| @@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s | |||||
| TensorValue::~TensorValue() { Destroy(); } | TensorValue::~TensorValue() { Destroy(); } | ||||
| void TensorValue::Destroy() { | void TensorValue::Destroy() { | ||||
| if (buffer_ != nullptr || ref_buffer_ != nullptr) { | |||||
| if (buffer_ != nullptr) { | |||||
| GELOGD("Unref tensor: %s", DebugString().c_str()); | GELOGD("Unref tensor: %s", DebugString().c_str()); | ||||
| buffer_.reset(); | buffer_.reset(); | ||||
| } | } | ||||
| @@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | ||||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); | |||||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | |||||
| "Failed to execute partitioned call."); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | ||||
| args.outputs.clear(); | |||||
| HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue> | |||||
| } | } | ||||
| Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | ||||
| const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc, | |||||
| const std::vector<TensorValue> &outputs) { | |||||
| GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | ||||
| GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | ||||
| if (!outputs.empty()) { | |||||
| GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs), | |||||
| "Failed to enable output zero copy by user provided outputs."); | |||||
| } | |||||
| if (!graph_item_->IsDynamic()) { | if (!graph_item_->IsDynamic()) { | ||||
| return ExecuteAsyncForKnownShape(inputs); | return ExecuteAsyncForKnownShape(inputs); | ||||
| } | } | ||||
| @@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||||
| return ExecuteAsync(inputs, input_desc, {}); | |||||
| } | |||||
| Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | ||||
| GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | ||||
| if (graph_item_->GetAllNodes().size() != 1) { | if (graph_item_->GetAllNodes().size() != 1) { | ||||
| @@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) { | |||||
| GELOGD("To enable zero copy, output number = %zu", outputs.size()); | |||||
| const auto &output_edges = graph_item_->GetOutputEdges(); | |||||
| // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node | |||||
| if (outputs.size() != output_edges.size()) { | |||||
| GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu", | |||||
| output_edges.size(), | |||||
| outputs.size()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| for (size_t i = 0; i < outputs.size(); ++i) { | |||||
| auto &output_tensor = outputs[i]; | |||||
| auto &output_node = output_edges[i].first; | |||||
| int output_idx = output_edges[i].second; | |||||
| GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s", | |||||
| graph_item_->GetName().c_str(), | |||||
| i, | |||||
| output_node->NodeName().c_str(), | |||||
| output_idx, | |||||
| output_tensor.DebugString().c_str()); | |||||
| GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor), | |||||
| "[%s] Failed to set input tensor[%zu]", | |||||
| graph_item_->GetName().c_str(), | |||||
| i); | |||||
| } | |||||
| GELOGD("Done enabling zero copy for outputs successfully."); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -43,7 +43,19 @@ class SubgraphExecutor { | |||||
| * @param input_desc input tensor descriptions | * @param input_desc input tensor descriptions | ||||
| * @return SUCCESS on success, error code otherwise | * @return SUCCESS on success, error code otherwise | ||||
| */ | */ | ||||
| Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc); | |||||
| Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc); | |||||
| /** | |||||
| * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | |||||
| * valid after this method returned | |||||
| * @param inputs input tensors | |||||
| * @param input_desc input tensor descriptions | |||||
| * @return SUCCESS on success, error code otherwise | |||||
| */ | |||||
| Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc, | |||||
| const std::vector<TensorValue> &outputs); | |||||
| /** | /** | ||||
| * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | ||||
| @@ -76,6 +88,7 @@ class SubgraphExecutor { | |||||
| private: | private: | ||||
| Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | ||||
| Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs); | |||||
| static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | ||||
| Status Init(const std::vector<TensorValue> &inputs, | Status Init(const std::vector<TensorValue> &inputs, | ||||
| const std::vector<ConstGeTensorDescPtr> &input_desc); | const std::vector<ConstGeTensorDescPtr> &input_desc); | ||||
| @@ -40,9 +40,14 @@ HybridModel::~HybridModel() { | |||||
| GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | ||||
| } | } | ||||
| Status HybridModel::Init() { | |||||
| Status HybridModel::Init(bool is_single_op) { | |||||
| GELOGD("Start to init hybrid model."); | GELOGD("Start to init hybrid model."); | ||||
| GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||||
| is_single_op_ = is_single_op; | |||||
| if (is_single_op) { | |||||
| GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); | |||||
| } else { | |||||
| GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||||
| } | |||||
| GELOGD("HybridModel initialized successfully."); | GELOGD("HybridModel initialized successfully."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -37,7 +37,7 @@ class HybridModel { | |||||
| ~HybridModel(); | ~HybridModel(); | ||||
| Status Init(); | |||||
| Status Init(bool is_single_op = false); | |||||
| const NodeItem *GetNodeItem(const NodePtr &node) const; | const NodeItem *GetNodeItem(const NodePtr &node) const; | ||||
| @@ -69,6 +69,10 @@ class HybridModel { | |||||
| return model_id_; | return model_id_; | ||||
| } | } | ||||
| bool IsSingleOp() const { | |||||
| return is_single_op_; | |||||
| } | |||||
| TensorValue* GetVariable(const string &name) const; | TensorValue* GetVariable(const string &name) const; | ||||
| NodePtr GetVariableNode(const string &name) const; | NodePtr GetVariableNode(const string &name) const; | ||||
| @@ -131,11 +135,13 @@ class HybridModel { | |||||
| std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
| bool is_new_model_desc_ = false; // support aipp | bool is_new_model_desc_ = false; // support aipp | ||||
| bool is_single_op_ = false; | |||||
| // runtime fields | // runtime fields | ||||
| uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
| std::unique_ptr<TensorBuffer> weight_buffer_; | |||||
| RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| @@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::BuildForSingleOp() { | |||||
| GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel"); | |||||
| hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); | |||||
| GELOGI("[%s] Start to build hybrid model.", GetGraphName()); | |||||
| auto ret = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||||
| const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; | |||||
| GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), | |||||
| "[%s] Failed to index task defs", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | |||||
| GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::ValidateParams() { | Status HybridModelBuilder::ValidateParams() { | ||||
| GE_CHECK_NOTNULL(ge_root_model_); | GE_CHECK_NOTNULL(ge_root_model_); | ||||
| GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | ||||
| @@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| } | } | ||||
| Status HybridModelBuilder::InitWeights() { | Status HybridModelBuilder::InitWeights() { | ||||
| // For constant in root graph | |||||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | |||||
| const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||||
| auto iter = subgraph_models.find(root_graph->GetName()); | |||||
| if (iter == subgraph_models.end()) { | |||||
| GELOGD("Root graph model not found"); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto &root_model = iter->second; | |||||
| const auto &weight_buffer = root_model->GetWeight(); | |||||
| if (weight_buffer.GetSize() == 0) { | |||||
| GELOGD("weight is empty"); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | auto allocator = NpuMemoryAllocator::GetAllocator(); | ||||
| GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
| for (auto &it : hybrid_model_.node_items_) { | |||||
| auto &node_item = it.second; | |||||
| if (node_item->node_type != CONSTANT) { | |||||
| hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size()); | |||||
| GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_); | |||||
| auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData()); | |||||
| GE_CHK_RT_RET(rtMemcpy(weight_base, | |||||
| hybrid_model_.weight_buffer_->GetSize(), | |||||
| weight_buffer.GetData(), | |||||
| weight_buffer.GetSize(), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | |||||
| weight_base, | |||||
| hybrid_model_.weight_buffer_->GetSize()); | |||||
| for (auto &node : root_graph->GetDirectNode()) { | |||||
| if (node->GetType() != CONSTANT) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| const auto &constant_node = node_item->node; | |||||
| auto op_desc = constant_node->GetOpDesc(); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| auto v_weights = ModelUtils::GetWeights(op_desc); | auto v_weights = ModelUtils::GetWeights(op_desc); | ||||
| if (v_weights.empty()) { | if (v_weights.empty()) { | ||||
| GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | ||||
| auto output_desc = op_desc->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| auto tensor_size = ge_tensor->GetData().GetSize(); | |||||
| GELOGD("[%s] Start to init Constant node [%s], size = %ld", | |||||
| GE_CHECK_NOTNULL(ge_tensor); | |||||
| const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); | |||||
| int64_t tensor_size = 0; | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), | |||||
| "[%s] Failed to get tensor size", | |||||
| node->GetName().c_str()); | |||||
| int64_t data_offset = 0; | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), | |||||
| "[%s] Failed to get data offset", | |||||
| node->GetName().c_str()); | |||||
| GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", | |||||
| GetGraphName(), | GetGraphName(), | ||||
| constant_node->GetName().c_str(), | |||||
| tensor_size); | |||||
| node->GetName().c_str(), | |||||
| tensor_size, | |||||
| data_offset); | |||||
| auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); | |||||
| auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); | |||||
| GE_CHECK_NOTNULL(tensor_buffer); | GE_CHECK_NOTNULL(tensor_buffer); | ||||
| std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | ||||
| GE_CHECK_NOTNULL(constant_tensor); | GE_CHECK_NOTNULL(constant_tensor); | ||||
| constant_tensor->SetName("Constant_" + op_desc->GetName()); | constant_tensor->SetName("Constant_" + op_desc->GetName()); | ||||
| if (tensor_size > 0) { | |||||
| GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), | |||||
| constant_tensor->GetSize(), | |||||
| ge_tensor->GetData().data(), | |||||
| ge_tensor->GetData().size(), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| } | |||||
| hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); | |||||
| GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); | |||||
| hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); | |||||
| GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) { | |||||
| // index task defs | |||||
| GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str()); | |||||
| std::unordered_map<int64_t, NodePtr> node_map; | |||||
| for (const auto &node : sub_graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| auto node_id = node->GetOpDesc()->GetId(); | |||||
| GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str()); | |||||
| node_map.emplace(node_id, node); | |||||
| } | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||||
| for (int i = 0; i < tasks.size(); ++i) { | |||||
| const domi::TaskDef &task_def = tasks[i]; | |||||
| GELOGI("Task id = %d, task type = %d", i, task_def.type()); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| uint32_t op_index = -1; | |||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| op_index = task_def.kernel().context().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
| op_index = task_def.kernel_ex().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_HCCL) { | |||||
| op_index = task_def.kernel_hccl().op_index(); | |||||
| } else { | |||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||||
| continue; | |||||
| } | |||||
| auto iter = node_map.find(op_index); | |||||
| if (iter == node_map.end()) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto &node = iter->second; | |||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | |||||
| } | |||||
| GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index); | |||||
| hybrid_model_.task_defs_[node].emplace_back(task_def); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::IndexTaskDefs() { | Status HybridModelBuilder::IndexTaskDefs() { | ||||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | const auto &root_graph = ge_root_model_->GetRootGraph(); | ||||
| if (SetOutputNameAttr(*root_graph) != SUCCESS) { | if (SetOutputNameAttr(*root_graph) != SUCCESS) { | ||||
| @@ -35,6 +35,7 @@ class HybridModelBuilder { | |||||
| explicit HybridModelBuilder(HybridModel &hybrid_model); | explicit HybridModelBuilder(HybridModel &hybrid_model); | ||||
| ~HybridModelBuilder() = default; | ~HybridModelBuilder() = default; | ||||
| Status Build(); | Status Build(); | ||||
| Status BuildForSingleOp(); | |||||
| private: | private: | ||||
| static Status UpdateAnchorStatus(const NodePtr &node); | static Status UpdateAnchorStatus(const NodePtr &node); | ||||
| @@ -64,6 +65,7 @@ class HybridModelBuilder { | |||||
| Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | ||||
| Status ParseDependentForFusedSubgraph(NodeItem &node_item); | Status ParseDependentForFusedSubgraph(NodeItem &node_item); | ||||
| Status IndexTaskDefs(); | Status IndexTaskDefs(); | ||||
| Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model); | |||||
| Status IndexSpecialNodes(); | Status IndexSpecialNodes(); | ||||
| Status InitRuntimeParams(); | Status InitRuntimeParams(); | ||||
| Status InitModelMem(); | Status InitModelMem(); | ||||
| @@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() { | |||||
| Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | ||||
| bool is_single_op = model.IsSingleOp(); | |||||
| auto *task_defs = model.GetTaskDefs(node); | auto *task_defs = model.GetTaskDefs(node); | ||||
| if (task_defs == nullptr || task_defs->empty()) { | if (task_defs == nullptr || task_defs->empty()) { | ||||
| @@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod | |||||
| AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | ||||
| std::unique_ptr<NodeTask> node_task; | std::unique_ptr<NodeTask> node_task; | ||||
| GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str()); | |||||
| GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op), | |||||
| "[%s] Failed to build op tasks.", node->GetName().c_str()); | |||||
| task = std::move(node_task); | task = std::move(node_task); | ||||
| GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| } | } | ||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
| rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | ||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| if (rt_ret != RT_ERROR_NONE || is_single_op_) { | |||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | ||||
| @@ -50,6 +50,8 @@ class AiCoreOpTask { | |||||
| uint32_t GetBlockDim() const {return block_dim_;} | uint32_t GetBlockDim() const {return block_dim_;} | ||||
| void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | |||||
| protected: | protected: | ||||
| Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
| virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
| @@ -72,6 +74,7 @@ class AiCoreOpTask { | |||||
| uint32_t args_size_ = 0; | uint32_t args_size_ = 0; | ||||
| uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
| bool clear_atomic_ = true; | bool clear_atomic_ = true; | ||||
| bool is_single_op_ = false; | |||||
| std::vector<int> output_indices_to_skip_; | std::vector<int> output_indices_to_skip_; | ||||
| }; | }; | ||||
| @@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector | |||||
| : op_desc_(op_desc), task_defs_(task_defs) { | : op_desc_(op_desc), task_defs_(task_defs) { | ||||
| } | } | ||||
| Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) { | |||||
| Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, | |||||
| bool ignore_failure_on_atomic, | |||||
| bool is_single_op) { | |||||
| GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
| if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | ||||
| GELOGE(INTERNAL_ERROR, | GELOGE(INTERNAL_ERROR, | ||||
| @@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||||
| auto atomic_task = | auto atomic_task = | ||||
| std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | ||||
| GE_CHECK_NOTNULL(atomic_task); | GE_CHECK_NOTNULL(atomic_task); | ||||
| atomic_task->SetSingleOp(is_single_op); | |||||
| GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | ||||
| "[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
| op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
| @@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||||
| // build aicore task | // build aicore task | ||||
| auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | ||||
| GE_CHECK_NOTNULL(aicore_task); | GE_CHECK_NOTNULL(aicore_task); | ||||
| aicore_task->SetSingleOp(is_single_op); | |||||
| GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | ||||
| "[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
| op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
| @@ -47,7 +47,7 @@ class AiCoreTaskBuilder { | |||||
| AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | ||||
| ~AiCoreTaskBuilder() = default; | ~AiCoreTaskBuilder() = default; | ||||
| Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic); | |||||
| Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false); | |||||
| private: | private: | ||||
| bool ExpectAtomicAddrCleanTask(); | bool ExpectAtomicAddrCleanTask(); | ||||
| @@ -27,7 +27,7 @@ namespace ge { | |||||
| namespace hybrid { | namespace hybrid { | ||||
| REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); | REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); | ||||
| const std::unordered_map<std::string, std::vector<uint32_t>> | |||||
| const std::map<std::string, std::vector<uint32_t>> | |||||
| RefInputTask::out_ref_input_index_ = {{DATA, {}}, | RefInputTask::out_ref_input_index_ = {{DATA, {}}, | ||||
| {AIPPDATA, {}}, | {AIPPDATA, {}}, | ||||
| {RESHAPE, {}}, | {RESHAPE, {}}, | ||||
| @@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<uint32_t>> | |||||
| {BROADCASTGRADIENTARGS, {}} | {BROADCASTGRADIENTARGS, {}} | ||||
| }; | }; | ||||
| const std::unordered_set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; | |||||
| const std::set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; | |||||
| Status RefInputTask::UpdateArgs(TaskContext &) { | Status RefInputTask::UpdateArgs(TaskContext &) { | ||||
| // no need update args | // no need update args | ||||
| @@ -46,7 +46,7 @@ class RefInputTask : public NodeTask { | |||||
| // key is op type, value is output ref input index, | // key is op type, value is output ref input index, | ||||
| // e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one | // e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one | ||||
| static const std::unordered_map<std::string, std::vector<uint32_t>> out_ref_input_index_; | |||||
| static const std::map<std::string, std::vector<uint32_t>> out_ref_input_index_; | |||||
| }; | }; | ||||
| class DependInputShapeTask : public NodeTask { | class DependInputShapeTask : public NodeTask { | ||||
| @@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask { | |||||
| const NodePtr node_; | const NodePtr node_; | ||||
| // ops depend input shape | // ops depend input shape | ||||
| static const std::unordered_set<std::string> depend_input_shape_ops_; | |||||
| static const std::set<std::string> depend_input_shape_ops_; | |||||
| }; | }; | ||||
| class ConstantNodeTask : public NodeTask { | class ConstantNodeTask : public NodeTask { | ||||
| @@ -31,7 +31,7 @@ using std::map; | |||||
| using std::vector; | using std::vector; | ||||
| namespace ge { | namespace ge { | ||||
| class GELib { | |||||
| class GE_FUNC_VISIBILITY GELib { | |||||
| public: | public: | ||||
| GELib() = default; | GELib() = default; | ||||
| ~GELib() = default; | ~GELib() = default; | ||||
| @@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) { | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
| bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
| std::string &dynamic_batch_size) { | std::string &dynamic_batch_size) { | ||||
| int32_t size = 0; | int32_t size = 0; | ||||
| for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { | for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { | ||||
| @@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
| bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
| const std::string input_format, std::string &dynamic_image_size) { | const std::string input_format, std::string &dynamic_image_size) { | ||||
| if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { | if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { | ||||
| GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); | GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); | ||||
| @@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool CheckDynamicDimsInputShapeValid(const unordered_map<string, vector<int64_t>> &shape_map, | |||||
| bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_map, | |||||
| string input_format, string &dynamic_dims) { | string input_format, string &dynamic_dims) { | ||||
| if (input_format != "ND") { | if (input_format != "ND") { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage( | ErrorManager::GetInstance().ATCReportErrMessage( | ||||
| @@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| unordered_map<string, vector<int64_t>> shape_map; | |||||
| map<string, vector<int64_t>> shape_map; | |||||
| vector<pair<string, vector<int64_t>>> user_shape_map; | vector<pair<string, vector<int64_t>>> user_shape_map; | ||||
| is_dynamic_input = true; | is_dynamic_input = true; | ||||
| if (input_shape.empty()) { | if (input_shape.empty()) { | ||||
| @@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||||
| bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &shape_map, | |||||
| vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | ||||
| vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | ||||
| const int DEFAULT_SHAPE_PAIR_SIZE = 2; | const int DEFAULT_SHAPE_PAIR_SIZE = 2; | ||||
| @@ -46,13 +46,13 @@ static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat = | |||||
| static const std::string kEnableCompressWeightTrue = "1"; | static const std::string kEnableCompressWeightTrue = "1"; | ||||
| static const std::string kEnableCompressWeightFalse = "0"; | static const std::string kEnableCompressWeightFalse = "0"; | ||||
| bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
| bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
| std::string &dynamic_batch_size); | std::string &dynamic_batch_size); | ||||
| bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
| bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
| const std::string input_format, std::string &dynamic_image_size); | const std::string input_format, std::string &dynamic_image_size); | ||||
| bool CheckDynamicDimsInputShapeValid(const std::unordered_map<std::string, std::vector<int64_t>> &shape_map, | |||||
| bool CheckDynamicDimsInputShapeValid(const std::map<std::string, std::vector<int64_t>> &shape_map, | |||||
| std::string input_format, std::string &dynamic_dims); | std::string input_format, std::string &dynamic_dims); | ||||
| bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); | bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); | ||||
| @@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string | |||||
| std::string &dynamic_dims, const std::string input_shape, | std::string &dynamic_dims, const std::string input_shape, | ||||
| const std::string input_format, bool &is_dynamic_input); | const std::string input_format, bool &is_dynamic_input); | ||||
| bool ParseInputShape(const std::string &input_shape, std::unordered_map<string, std::vector<int64_t>> &shape_map, | |||||
| bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map, | |||||
| std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | ||||
| Status CheckOutputTypeParamValid(const std::string output_type); | Status CheckOutputTypeParamValid(const std::string output_type); | ||||
| @@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | |||||
| if (options_.find(kInputShape) == options_.end()) { | if (options_.find(kInputShape) == options_.end()) { | ||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| unordered_map<string, vector<int64_t>> shape_map; | |||||
| map<string, vector<int64_t>> shape_map; | |||||
| vector<pair<string, vector<int64_t>>> user_shape_map; | vector<pair<string, vector<int64_t>>> user_shape_map; | ||||
| GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), | GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), | ||||
| return GRAPH_PARAM_INVALID, "parse input shape failed!"); | return GRAPH_PARAM_INVALID, "parse input shape failed!"); | ||||
| @@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE | |||||
| -O2 | -O2 | ||||
| -Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(atc_atc.bin PRIVATE | target_compile_definitions(atc_atc.bin PRIVATE | ||||
| @@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE | |||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(atc_atc.bin PRIVATE | target_include_directories(atc_atc.bin PRIVATE | ||||
| @@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_options(atc_atc.bin PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_atc.bin PRIVATE | target_link_libraries(atc_atc.bin PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ascend_protobuf | ascend_protobuf | ||||
| @@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE | |||||
| -O2 | -O2 | ||||
| -Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(fwk_atc.bin PRIVATE | target_compile_definitions(fwk_atc.bin PRIVATE | ||||
| @@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE | |||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(fwk_atc.bin PRIVATE | target_include_directories(fwk_atc.bin PRIVATE | ||||
| @@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ) | ) | ||||
| target_link_options(fwk_atc.bin PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(fwk_atc.bin PRIVATE | target_link_libraries(fwk_atc.bin PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ascend_protobuf | ascend_protobuf | ||||
| @@ -23,7 +23,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>; | using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>; | ||||
| class OpsKernelBuilderManager { | |||||
| class GE_FUNC_VISIBILITY OpsKernelBuilderManager { | |||||
| public: | public: | ||||
| ~OpsKernelBuilderManager(); | ~OpsKernelBuilderManager(); | ||||
| @@ -41,7 +41,7 @@ using std::vector; | |||||
| namespace ge { | namespace ge { | ||||
| using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>; | using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>; | ||||
| class OpsKernelManager { | |||||
| class GE_FUNC_VISIBILITY OpsKernelManager { | |||||
| public: | public: | ||||
| friend class GELib; | friend class GELib; | ||||
| @@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST}) | |||||
| target_compile_options(engine PRIVATE | target_compile_options(engine PRIVATE | ||||
| -Werror | -Werror | ||||
| -fno-common | -fno-common | ||||
| -fvisibility=hidden | |||||
| ) | ) | ||||
| target_compile_definitions(engine PRIVATE | target_compile_definitions(engine PRIVATE | ||||
| REUSE_MEMORY=1 | REUSE_MEMORY=1 | ||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| FUNC_VISIBILITY | |||||
| ) | ) | ||||
| target_include_directories(engine PRIVATE | target_include_directories(engine PRIVATE | ||||
| @@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ) | ) | ||||
| target_link_options(engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(engine PRIVATE | target_link_libraries(engine PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| @@ -25,7 +25,7 @@ | |||||
| #include "plugin/engine/engine_manage.h" | #include "plugin/engine/engine_manage.h" | ||||
| namespace ge { | namespace ge { | ||||
| class AICoreDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| AICoreDNNEngine() = default; | AICoreDNNEngine() = default; | ||||
| explicit AICoreDNNEngine(const std::string &engine_name); | explicit AICoreDNNEngine(const std::string &engine_name); | ||||
| @@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine { | |||||
| DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
| }; | }; | ||||
| class VectorCoreDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| VectorCoreDNNEngine() = default; | VectorCoreDNNEngine() = default; | ||||
| explicit VectorCoreDNNEngine(const std::string &engine_name); | explicit VectorCoreDNNEngine(const std::string &engine_name); | ||||
| @@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine { | |||||
| }; | }; | ||||
| class AICpuDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| AICpuDNNEngine() = default; | AICpuDNNEngine() = default; | ||||
| explicit AICpuDNNEngine(const std::string &engine_name); | explicit AICpuDNNEngine(const std::string &engine_name); | ||||
| @@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine { | |||||
| DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
| }; | }; | ||||
| class AICpuTFDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| AICpuTFDNNEngine() = default; | AICpuTFDNNEngine() = default; | ||||
| explicit AICpuTFDNNEngine(const std::string &engine_name); | explicit AICpuTFDNNEngine(const std::string &engine_name); | ||||
| @@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine { | |||||
| DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
| }; | }; | ||||
| class GeLocalDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| GeLocalDNNEngine() = default; | GeLocalDNNEngine() = default; | ||||
| explicit GeLocalDNNEngine(const std::string &engine_name); | explicit GeLocalDNNEngine(const std::string &engine_name); | ||||
| @@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine { | |||||
| DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
| }; | }; | ||||
| class HostCpuDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| HostCpuDNNEngine() = default; | HostCpuDNNEngine() = default; | ||||
| explicit HostCpuDNNEngine(const std::string &engine_name); | explicit HostCpuDNNEngine(const std::string &engine_name); | ||||
| @@ -116,7 +116,7 @@ private: | |||||
| DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
| }; | }; | ||||
| class RtsDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| RtsDNNEngine() = default; | RtsDNNEngine() = default; | ||||
| explicit RtsDNNEngine(const std::string &engine_name); | explicit RtsDNNEngine(const std::string &engine_name); | ||||
| @@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine { | |||||
| DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
| }; | }; | ||||
| class HcclDNNEngine : public DNNEngine { | |||||
| class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine { | |||||
| public: | public: | ||||
| HcclDNNEngine() = default; | HcclDNNEngine() = default; | ||||
| explicit HcclDNNEngine(const std::string &engine_name); | explicit HcclDNNEngine(const std::string &engine_name); | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | #ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | ||||
| #define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | #define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| @@ -26,7 +40,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| using DNNEnginePtr = std::shared_ptr<DNNEngine>; | using DNNEnginePtr = std::shared_ptr<DNNEngine>; | ||||
| class EngineManager { | |||||
| class GE_FUNC_VISIBILITY EngineManager { | |||||
| public: | public: | ||||
| static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr); | static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr); | ||||
| static DNNEnginePtr GetEngine(const std::string &engine_name); | static DNNEnginePtr GetEngine(const std::string &engine_name); | ||||
| @@ -34,7 +48,7 @@ class EngineManager { | |||||
| }; | }; | ||||
| extern "C" { | extern "C" { | ||||
| void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines); | |||||
| GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines); | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | #endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | ||||
| @@ -77,6 +77,23 @@ Status InnerSession::Initialize() { | |||||
| UpdateThreadContext(std::map<std::string, std::string>{}); | UpdateThreadContext(std::map<std::string, std::string>{}); | ||||
| // session device id set here | |||||
| std::string str_session_device_id; | |||||
| if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) { | |||||
| GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str()); | |||||
| uint32_t session_device_id = 0; | |||||
| try { | |||||
| session_device_id = static_cast<uint32_t>(std::stoi(str_session_device_id.c_str())); | |||||
| // session device id has priority | |||||
| GetContext().SetCtxDeviceId(session_device_id); | |||||
| } catch (std::invalid_argument &) { | |||||
| GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); | |||||
| } catch (std::out_of_range &) { | |||||
| GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); | |||||
| } | |||||
| } | |||||
| GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | ||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| @@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, | |||||
| } | } | ||||
| // Analyze the input shape paramete | // Analyze the input shape paramete | ||||
| unordered_map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims; | |||||
| map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims; | |||||
| if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, | if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, | ||||
| is_dynamic_input) || | is_dynamic_input) || | ||||
| @@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) { | |||||
| /// | /// | ||||
| static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) { | static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) { | ||||
| GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
| unordered_map<string, string> graphNodeTypes; | |||||
| map<string, string> graphNodeTypes; | |||||
| for (const NodePtr &node : graph->GetAllNodes()) { | for (const NodePtr &node : graph->GetAllNodes()) { | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| @@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
| vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
| vector<DataBuffer> &output_buffers) { | vector<DataBuffer> &output_buffers) { | ||||
| GE_CHECK_NOTNULL(op_task_); | |||||
| GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | ||||
| if (hybrid_model_executor_ != nullptr) { | |||||
| GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | |||||
| hybrid::HybridModelExecutor::ExecuteArgs args; | |||||
| for (auto &input : input_buffers) { | |||||
| args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
| } | |||||
| for (auto &output : output_buffers) { | |||||
| args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
| } | |||||
| for (auto &tensor_desc : input_desc) { | |||||
| auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
| GE_CHECK_NOTNULL(desc); | |||||
| args.input_desc.emplace_back(desc); | |||||
| } | |||||
| return hybrid_model_executor_->Execute(args); | |||||
| } | |||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | std::lock_guard<std::mutex> lk(*stream_mutex_); | ||||
| GE_CHECK_NOTNULL(op_task_); | |||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
| @@ -28,6 +28,7 @@ | |||||
| #include "runtime/stream.h" | #include "runtime/stream.h" | ||||
| #include "task/op_task.h" | #include "task/op_task.h" | ||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "hybrid/executor/hybrid_model_executor.h" | |||||
| namespace ge { | namespace ge { | ||||
| class StreamResource; | class StreamResource; | ||||
| @@ -46,7 +47,7 @@ class SingleOp { | |||||
| Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| StreamResource *stream_resource_; | |||||
| StreamResource *stream_resource_ = nullptr; | |||||
| std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| std::vector<void *> input_addr_list_; | std::vector<void *> input_addr_list_; | ||||
| @@ -77,6 +78,8 @@ class DynamicSingleOp { | |||||
| std::vector<DataBuffer> &outputs) const; | std::vector<DataBuffer> &outputs) const; | ||||
| std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
| std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||||
| std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||||
| uintptr_t resource_id_ = 0; | uintptr_t resource_id_ = 0; | ||||
| std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| @@ -31,6 +31,8 @@ | |||||
| #include "task/aicpu_task_builder.h" | #include "task/aicpu_task_builder.h" | ||||
| #include "task/aicpu_kernel_task_builder.h" | #include "task/aicpu_kernel_task_builder.h" | ||||
| #include "task/tbe_task_builder.h" | #include "task/tbe_task_builder.h" | ||||
| #include "hybrid/executor/hybrid_model_executor.h" | |||||
| #include "hybrid/node_executor/node_executor.h" | |||||
| static std::atomic<std::uint64_t> aicpu_kernel_id(0); | static std::atomic<std::uint64_t> aicpu_kernel_id(0); | ||||
| @@ -42,6 +44,20 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const size_t kDataOutputNum = 1; | const size_t kDataOutputNum = 1; | ||||
| } // namespace | } // namespace | ||||
| static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||||
| auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||||
| for (const auto &node : comp_graph->GetAllNodes()) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| const auto &depends = op_desc->GetOpInferDepends(); | |||||
| if (!depends.empty()) { | |||||
| flag = true; | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | ||||
| : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | ||||
| @@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
| single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
| model_params_.memory_size = UINT_MAX; | model_params_.memory_size = UINT_MAX; | ||||
| auto ge_model = model_helper_.GetGeModel(); | |||||
| GE_CHECK_NOTNULL(ge_model); | |||||
| bool infer_depend_flag = false; | |||||
| GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag)); | |||||
| if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) { | |||||
| GELOGD("Build single op HybridModel."); | |||||
| GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||||
| auto root_model = model_helper_.GetGeRootModel(); | |||||
| GE_CHECK_NOTNULL(root_model); | |||||
| root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | |||||
| root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); | |||||
| single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||||
| GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model"); | |||||
| int32_t device_id = 0; | |||||
| GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||||
| single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||||
| device_id, | |||||
| resource.GetStream())); | |||||
| GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); | |||||
| return SUCCESS; | |||||
| } | |||||
| return BuildTaskListForDynamicOp(single_op); | return BuildTaskListForDynamicOp(single_op); | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { | |||||
| return it->second.get(); | return it->second.get(); | ||||
| } | } | ||||
| rtStream_t StreamResource::GetStream() const { | |||||
| return stream_; | |||||
| } | |||||
| void StreamResource::SetStream(rtStream_t stream) { | void StreamResource::SetStream(rtStream_t stream) { | ||||
| stream_ = stream; | stream_ = stream; | ||||
| } | } | ||||
| @@ -37,6 +37,7 @@ class StreamResource { | |||||
| StreamResource(StreamResource &&) = delete; | StreamResource(StreamResource &&) = delete; | ||||
| StreamResource &operator=(const StreamResource &) = delete; | StreamResource &operator=(const StreamResource &) = delete; | ||||
| StreamResource &operator=(StreamResource &&) = delete; | StreamResource &operator=(StreamResource &&) = delete; | ||||
| rtStream_t GetStream() const; | |||||
| void SetStream(rtStream_t stream); | void SetStream(rtStream_t stream); | ||||
| SingleOp *GetOperator(const void *key); | SingleOp *GetOperator(const void *key); | ||||
| @@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve | |||||
| """ | """ | ||||
| this attr is used for symbol table visible | this attr is used for symbol table visible | ||||
| """ | """ | ||||
| GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||||
| GE_ATTR = 'GE_FUNC_VISIBILITY' | |||||
| """ | """ | ||||
| generate stub func body by return type | generate stub func body by return type | ||||
| @@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map<AscendString | |||||
| } | } | ||||
| // Initialize GE | // Initialize GE | ||||
| ATTRIBUTED_DEPRECATED(Status GEInitialize(const std::map<AscendString, AscendString> &)) | |||||
| Status GEInitialize(const std::map<std::string, std::string> &options); | |||||
| ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &)) | |||||
| GE_FUNC_VISIBILITY Status GEInitialize(const std::map<std::string, std::string> &options); | |||||
| Status GEInitialize(const std::map<AscendString, AscendString> &options); | |||||
| GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &options); | |||||
| // Finalize GE, release all resources | // Finalize GE, release all resources | ||||
| Status GEFinalize(); | |||||
| GE_FUNC_VISIBILITY Status GEFinalize(); | |||||
| class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { | |||||
| class GE_FUNC_VISIBILITY Session { | |||||
| public: | public: | ||||
| ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | ||||
| explicit Session(const std::map<std::string, std::string> &options); | explicit Session(const std::map<std::string, std::string> &options); | ||||
| @@ -28,7 +28,7 @@ namespace ge { | |||||
| #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | ||||
| #endif | #endif | ||||
| class StatusFactory { | |||||
| class GE_FUNC_VISIBILITY StatusFactory { | |||||
| public: | public: | ||||
| static StatusFactory *Instance() { | static StatusFactory *Instance() { | ||||
| static StatusFactory instance; | static StatusFactory instance; | ||||
| @@ -70,7 +70,7 @@ class StatusFactory { | |||||
| std::map<uint32_t, std::string> err_desc_; | std::map<uint32_t, std::string> err_desc_; | ||||
| }; | }; | ||||
| class ErrorNoRegisterar { | |||||
| class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||||
| public: | public: | ||||
| ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ||||
| ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | ||||
| #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <stddef.h> | #include <stddef.h> | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef INC_EXTERNAL_GE_IR_BUILD_H_ | #ifndef INC_EXTERNAL_GE_IR_BUILD_H_ | ||||
| #define INC_EXTERNAL_GE_IR_BUILD_H_ | #define INC_EXTERNAL_GE_IR_BUILD_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <string> | #include <string> | ||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -44,17 +58,17 @@ struct ModelBufferData { | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &)) | |||||
| graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options); | |||||
| ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &)) | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options); | |||||
| graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options); | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| * @brief build model.Notice the model is stored in buffer | * @brief build model.Notice the model is stored in buffer | ||||
| * | * | ||||
| */ | */ | ||||
| void aclgrphBuildFinalize(); | |||||
| GE_FUNC_VISIBILITY void aclgrphBuildFinalize(); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -66,12 +80,12 @@ void aclgrphBuildFinalize(); | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &, | |||||
| ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &, | |||||
| ModelBufferData &)) | ModelBufferData &)) | ||||
| graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options, | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options, | |||||
| ModelBufferData &model); | ModelBufferData &model); | ||||
| graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options, | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options, | |||||
| ModelBufferData &model); | ModelBufferData &model); | ||||
| /** | /** | ||||
| @@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendStrin | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ATTRIBUTED_DEPRECATED(graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||||
| graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | |||||
| ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | |||||
| graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -98,7 +112,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -110,7 +124,7 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -123,7 +137,7 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs, | |||||
| GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs, | |||||
| const std::vector<TensorDesc> &outputs, Graph &graph); | const std::vector<TensorDesc> &outputs, Graph &graph); | ||||
| }; // namespace ge | }; // namespace ge | ||||
| @@ -37,7 +37,7 @@ extern "C" { | |||||
| // trace status of log | // trace status of log | ||||
| enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | ||||
| class GeLog { | |||||
| class GE_FUNC_VISIBILITY GeLog { | |||||
| public: | public: | ||||
| static uint64_t GetTid() { | static uint64_t GetTid() { | ||||
| #ifdef __GNUC__ | #ifdef __GNUC__ | ||||
| @@ -278,7 +278,7 @@ | |||||
| } while (0) | } while (0) | ||||
| template <typename T> | template <typename T> | ||||
| std::string FmtToStr(const T &t) { | |||||
| GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||||
| std::string fmt; | std::string fmt; | ||||
| std::stringstream st; | std::stringstream st; | ||||
| st << "[" << t << "]"; | st << "[" << t << "]"; | ||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | #ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | ||||
| #define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | #define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| @@ -38,7 +52,7 @@ const int MODID_OME = 2; // OME module ID | |||||
| const int MODID_CALIBRATION = 3; // Calibration module ID | const int MODID_CALIBRATION = 3; // Calibration module ID | ||||
| namespace domi { | namespace domi { | ||||
| class StatusFactory { | |||||
| class GE_FUNC_VISIBILITY StatusFactory { | |||||
| public: | public: | ||||
| static StatusFactory *Instance(); | static StatusFactory *Instance(); | ||||
| @@ -54,7 +68,7 @@ class StatusFactory { | |||||
| std::map<uint32_t, std::string> err_desc_; | std::map<uint32_t, std::string> err_desc_; | ||||
| }; | }; | ||||
| class ErrorNoRegisterar { | |||||
| class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||||
| public: | public: | ||||
| ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ||||
| ~ErrorNoRegisterar() {} | ~ErrorNoRegisterar() {} | ||||
| @@ -23,7 +23,7 @@ | |||||
| #include "graph/tensor.h" | #include "graph/tensor.h" | ||||
| namespace ge { | namespace ge { | ||||
| class GeFormatUtil { | |||||
| class GE_FUNC_VISIBILITY GeFormatUtil { | |||||
| public: | public: | ||||
| /// | /// | ||||
| /// @name TransShape | /// @name TransShape | ||||
| @@ -215,7 +215,7 @@ struct ModelInfo { | |||||
| }; | }; | ||||
| // Asynchronous callback interface, implemented by the caller | // Asynchronous callback interface, implemented by the caller | ||||
| class ModelListener { | |||||
| class GE_FUNC_VISIBILITY ModelListener { | |||||
| public: | public: | ||||
| virtual ~ModelListener() {} | virtual ~ModelListener() {} | ||||
| /// | /// | ||||
| @@ -17,11 +17,25 @@ | |||||
| #ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | #ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | ||||
| #define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | #define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <gflags/gflags.h> | #include <gflags/gflags.h> | ||||
| #include <string> | #include <string> | ||||
| namespace ge { | namespace ge { | ||||
| class GflagsUtils { | |||||
| class GE_FUNC_VISIBILITY GflagsUtils { | |||||
| public: | public: | ||||
| static bool IsSetCommandTrue(const char *name) { | static bool IsSetCommandTrue(const char *name) { | ||||
| std::string out; | std::string out; | ||||
| @@ -28,7 +28,7 @@ | |||||
| #include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
| namespace ge { | namespace ge { | ||||
| class ModelHelper { | |||||
| class GE_FUNC_VISIBILITY ModelHelper { | |||||
| public: | public: | ||||
| ModelHelper() = default; | ModelHelper() = default; | ||||
| ~ModelHelper(); | ~ModelHelper(); | ||||