diff --git a/.gitignore b/.gitignore index 8a4003cf..891c0f87 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /build /output /prebuilts +/cov *.ir *.out diff --git a/CMakeLists.txt b/CMakeLists.txt index 29be9eda..186ef4e6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) if(PLATFORM STREQUAL "train") - find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) - find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) @@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC) elseif(PLATFORM STREQUAL "inference") find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) - find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) - find_module(resource libresource.so ${ASCEND_ATC_DIR}) + find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) - #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) @@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) - find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) - find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) - find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) - find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) - find_module(resource libresource.so ${ASCEND_ATC_DIR}) - find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) - find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) + find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) + find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) + find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) - find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) - #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) + find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) + find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) + find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) else() message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() diff --git a/build.sh b/build.sh index f2fafd48..7b6da560 100644 --- a/build.sh +++ b/build.sh @@ -166,14 +166,14 @@ build_graphengine() echo "execute command: cmake ${CMAKE_ARGS} .. failed." return 1 fi - COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " + COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " TARGET=${COMMON_TARGET} if [ "x${PLATFORM}" = "xtrain" ] then - TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}" + TARGET="ge_runner fwk_atc.bin ${TARGET}" elif [ "x${PLATFORM}" = "xinference" ] then - TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}" + TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}" elif [ "X$ENABLE_GE_UT" = "Xon" ] then TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" @@ -183,7 +183,7 @@ build_graphengine() elif [ "x${PLATFORM}" = "xall" ] then # build all the target - TARGET="" + TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" fi make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install @@ -198,8 +198,6 @@ g++ -v mk_dir ${OUTPUT_PATH} build_graphengine || { echo "GraphEngine build failed."; return; } echo "---------------- GraphEngine build finished ----------------" -#cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}" -#rm -rf "${OUTPUT_PATH}/"libproto* rm -f ${OUTPUT_PATH}/libgmock*.so rm -f ${OUTPUT_PATH}/libgtest*.so rm -f ${OUTPUT_PATH}/lib*_stub.so @@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500 echo "---------------- GraphEngine output generated ----------------" -# if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then -# cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH} -# fi - if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH} cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH} @@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} -# if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then -# export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH} -# echo ${LD_LIBRARY_PATH} ${OUTPUT_PATH}/ut_libgraph && ${OUTPUT_PATH}/ut_libge_multiparts_utest && ${OUTPUT_PATH}/ut_libge_distinct_load_utest && @@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" exit 1; fi -# fi - -# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then - echo "Generating coverage statistics, please wait..." - cd ${BASEPATH} - rm -rf ${BASEPATH}/cov - mkdir ${BASEPATH}/cov - lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info - cd ${BASEPATH}/cov - genhtml coverage.info + echo "Generating coverage statistics, please wait..." + cd ${BASEPATH} + rm -rf ${BASEPATH}/cov + mkdir ${BASEPATH}/cov + lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info + cd ${BASEPATH}/cov + genhtml coverage.info fi # generate output package in tar form, including ut/st libraries/executables @@ -256,6 +244,8 @@ generate_package() ATC_PATH="atc/lib64" ATC_BIN_PATH="atc/bin" FWK_BIN_PATH="fwkacllib/bin" + FWK_INCLUDE_PATH="fwkacllib/include" + ATC_INCLUDE_PATH="atc/include" NNENGINE_PATH="plugin/nnengine/ge_config" OPSKERNEL_PATH="plugin/opskernel" @@ -277,6 +267,8 @@ generate_package() mk_dir "${OUTPUT_PATH}/${ACL_PATH}" mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}" mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" + mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" + mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" cd "${OUTPUT_PATH}" @@ -289,10 +281,10 @@ generate_package() find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \; MAX_DEPTH=1 - if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] - then - MAX_DEPTH=2 - fi +# if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] +# then +# MAX_DEPTH=2 +# fi for lib in "${PLUGIN_OPSKERNEL[@]}"; do find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \; @@ -318,7 +310,15 @@ generate_package() find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \; find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \; find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \; - + + cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH} + cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH} + cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH} + + cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH} + cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH} + cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH} + if [ "x${PLATFORM}" = "xtrain" ] then tar -cf graphengine_lib.tar fwkacllib @@ -339,4 +339,4 @@ then find ./ -name graphengine_lib.tar -exec rm {} \; tar -cf graphengine_lib.tar lib fi -echo "---------------- GraphEngine package archive generated ----------------" \ No newline at end of file +echo "---------------- GraphEngine package archive generated ----------------" diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 16494a33..3ae51590 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -639,15 +639,6 @@ set(INFER_SRC_LIST "graph/load/model_manager/task_info/model_exit_task_info.cc" "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" - "single_op/task/op_task.cc" - "single_op/task/build_task_utils.cc" - "single_op/task/tbe_task_builder.cc" - "single_op/task/aicpu_task_builder.cc" - "single_op/task/aicpu_kernel_task_builder.cc" - "single_op/single_op.cc" - "single_op/single_op_model.cc" - "single_op/stream_resource.cc" - "single_op/single_op_manager.cc" "hybrid/hybrid_davinci_model_stub.cc" "ir_build/ge_ir_build.cc" "ir_build/atc_ir_common.cc" @@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE FMK_SUPPORT_DUMP DAVINCI_CLOUD google=ascend_private + FUNC_VISIBILITY ) target_compile_options(ge_runner PRIVATE -O2 -fno-common + -fvisibility=hidden $<$:-Werror=unused-variable> $<$:-Werror=unused-const-variable -Werror=format> ) @@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) +target_link_options(ge_runner PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_runner PRIVATE $ adump_server @@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE FMK_HOST_INFER COMPILE_OMG_PACKAGE google=ascend_private + FUNC_VISIBILITY ) target_compile_options(ge_compiler PRIVATE -O2 -fno-common + -fvisibility=hidden $<$:-Werror=unused-variable> $<$:-Werror=unused-const-variable -Werror=format> ) @@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) +target_link_options(ge_compiler PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_compiler PRIVATE $ static_mmpa @@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE -O2 -fvisibility=hidden ) + target_link_options(opensrc_ascendcl PRIVATE -rdynamic -Wl,--allow-multiple-definition @@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE -Wl,-Bsymbolic -Wl,--exclude-libs,ALL ) + target_link_libraries(opensrc_ascendcl PRIVATE -Wl,--whole-archive ge_executor diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 0172628c..22b1a7ea 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -12,7 +12,7 @@ set(PROTO_LIST "${METADEF_DIR}/proto/tensorflow/tensor.proto" "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" "${METADEF_DIR}/proto/tensorflow/types.proto" - "${METADEF_DIR}/proto/tensorflow/versions.proto" + "${METADEF_DIR}/proto/tensorflow/versions.proto" ) protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) @@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE FMK_SUPPORT_DUMP OS_CENTOS google=ascend_private + FUNC_VISIBILITY ) target_compile_options(ge_common PRIVATE @@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) +target_link_options(ge_common PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_common PRIVATE $ static_mmpa @@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> LOG_CPP + FUNC_VISIBILITY ) target_compile_options(ge_common_static PRIVATE @@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE OS_CENTOS google=ascend_private LOG_CPP + FUNC_VISIBILITY ) target_compile_options(ge_common PRIVATE @@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) +target_link_options(ge_common PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_common PRIVATE $ ascend_protobuf_static diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index bf8c3ce0..67d934df 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const { return false; } // Compare allocated graph id info between json and VarManager - std::unordered_map allocated_graph_id; + std::map allocated_graph_id; auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); if (ret != SUCCESS) { GELOGW("Fail to parse AllocatedGraphId from Json."); @@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const { return false; } // Compare variable changed graph id info between json and VarManager - std::unordered_map changed_graph_id; + std::map changed_graph_id; auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); if (ret != SUCCESS) { GELOGW("Fail to parse ChangedGraphId from Json."); @@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const { } // Compare variable address info between json and VarManager std::vector> var_addr_mgr_vector; - std::unordered_set var_offset_set; + std::set var_offset_set; auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); if (ret != SUCCESS) { GELOGW("Fail to parse VarAddrMgrMap from Json."); @@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const { GELOGW("Input param json type should be null or array."); return PARAM_INVALID; } - std::unordered_map allocated_graph_id; + std::map allocated_graph_id; auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); if (ret != SUCCESS) { GELOGW("Fail to parse AllocatedGraphId from Json."); @@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const { GELOGW("Input param json type should be null or array."); return PARAM_INVALID; } - std::unordered_map changed_graph_id; + std::map changed_graph_id; auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); if (ret != SUCCESS) { GELOGW("Fail to parse AllocatedGraphId from Json."); @@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { return PARAM_INVALID; } std::vector> var_addr_mgr_vector; - std::unordered_set var_offset_set; + std::set var_offset_set; auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); if (ret != SUCCESS) { GELOGW("Fail to parse VarAddrMgrMap from Json."); @@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map> &var_addr_mgr_vector, - std::unordered_set &var_offset_set) { + std::set &var_offset_set) { if (!(json.is_array() || json.is_null())) { GELOGW("Input param json type should be null or array."); return PARAM_INVALID; @@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson( } Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, - std::unordered_map &changed_graph_id) { + std::map &changed_graph_id) { if (!(json.is_array() || json.is_null())) { GELOGW("Input param json type should be null or array."); return PARAM_INVALID; @@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, } Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json, - std::unordered_map &allocated_graph_id) { + std::map &allocated_graph_id) { if (!(json.is_array() || json.is_null())) { GELOGW("Input param json type should be null or array."); return PARAM_INVALID; diff --git a/ge/common/helper/model_cache_helper.h b/ge/common/helper/model_cache_helper.h index 68381e96..398d6c03 100755 --- a/ge/common/helper/model_cache_helper.h +++ b/ge/common/helper/model_cache_helper.h @@ -95,15 +95,15 @@ class ModelCacheHelper { static Status ParseMemResourceFromJson(const Json &json, map &mem_resource); static Status ParseVarAddrMgrMapFromJson(const Json &json, std::vector> &var_addr_mgr_vector, - std::unordered_set &var_offset_set); + std::set &var_offset_set); static Status ParseCurVarTensorDescMapFromJson( const Json &json, std::unordered_map &cur_var_tensor_desc_map); static Status ParseTransRoadsFromJson(const Json &json, std::unordered_map> &trans_roads); static Status ParseChangedGraphIdFromJson(const Json &json, - std::unordered_map &changed_graph_id); + std::map &changed_graph_id); static Status ParseAllocatedGraphIdFromJson(const Json &json, - std::unordered_map &allocated_graph_id); + std::map &allocated_graph_id); static Status ParseBroadcastInfoFromJson(const Json &json, std::unordered_map &var_broadcast_info); static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name); diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 9060f82b..08fdc0ae 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { return false; } - std::unordered_set record; + std::set record; for (size_t i = 0; i < device_nums; ++i) { uint32_t dev_id = deviceid_list[i]; if (dev_id >= static_cast(dev_count)) { diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 26e53c7b..8e56dc50 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE $<$,$>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> $<$,$>:/MTd> $<$,$>:/MT> + $<$:-Werror=unused-variable> + $<$:-Werror=unused-const-variable -Werror=format> ) target_compile_definitions(ge_executor PRIVATE @@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE LOG_CPP ) -target_include_directories(ge_executor PRIVATE +target_include_directories(ge_executor SYSTEM PRIVATE ${GE_CODE_DIR}/ge ${GE_CODE_DIR}/inc ${GE_CODE_DIR}/inc/external @@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE -Werror -O2 -Wno-deprecated-declarations + -fvisibility=hidden ) target_compile_definitions(ge_executor_shared PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 DAVINCI_SUPPORT_PROFILING google=ascend_private + FUNC_VISIBILITY ) target_include_directories(ge_executor_shared PRIVATE @@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(ge_executor_shared PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_executor_shared PRIVATE $ msprofiler diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index 7189e8ff..00142cfe 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(ge_local_engine PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(ge_local_engine PRIVATE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(ge_local_engine PRIVATE @@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(ge_local_engine PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_local_engine PRIVATE $ -Wl,--no-as-needed @@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(atc_ge_local_engine PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(atc_ge_local_engine PRIVATE - COMPILE_OMG_PACKAGE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(atc_ge_local_engine PRIVATE @@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(atc_ge_local_engine PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(atc_ge_local_engine PRIVATE $ -Wl,--no-as-needed @@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR target_compile_options(ge_local_opskernel_builder PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(ge_local_opskernel_builder PRIVATE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(ge_local_opskernel_builder PRIVATE @@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(ge_local_opskernel_builder PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_local_opskernel_builder PRIVATE $ -Wl,--no-as-needed @@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO target_compile_options(atc_ge_local_opskernel_builder PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(atc_ge_local_opskernel_builder PRIVATE @@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(atc_ge_local_opskernel_builder PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(atc_ge_local_opskernel_builder PRIVATE $ -Wl,--no-as-needed @@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR target_compile_options(ge_local_opskernel_builder_static PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(ge_local_opskernel_builder_static PRIVATE google=ascend_private LOG_CPP + FUNC_VISIBILITY ) target_include_directories(ge_local_opskernel_builder_static PRIVATE diff --git a/ge/ge_local_engine/engine/ge_local_engine.h b/ge/ge_local_engine/engine/ge_local_engine.h index 65dfe65b..9eedb533 100644 --- a/ge/ge_local_engine/engine/ge_local_engine.h +++ b/ge/ge_local_engine/engine/ge_local_engine.h @@ -17,6 +17,20 @@ #ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ #define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -32,7 +46,7 @@ namespace ge_local { * ge local engine. * Used for the ops not belong to any engine. eg:netoutput */ -class GeLocalEngine { +class GE_FUNC_VISIBILITY GeLocalEngine { public: /** * get GeLocalEngine instance. @@ -94,25 +108,25 @@ extern "C" { * When Ge start, GE will invoke this interface * @return The status whether initialize successfully */ -ge::Status Initialize(const map &options); +GE_FUNC_VISIBILITY ge::Status Initialize(const map &options); /** * After the initialize, GE will invoke this interface to get the Ops kernel Store * @param ops_kernel_map The ge local's ops kernel info */ -void GetOpsKernelInfoStores(std::map &ops_kernel_map); +GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map &ops_kernel_map); /** * After the initialize, GE will invoke this interface to get the Graph Optimizer * @param graph_optimizers The ge local's Graph Optimizer objs */ -void GetGraphOptimizerObjs(std::map &graph_optimizers); +GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map &graph_optimizers); /** * When the graph finished, GE will invoke this interface * @return The status whether initialize successfully */ -ge::Status Finalize(); +GE_FUNC_VISIBILITY ge::Status Finalize(); } #endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index 0b99ecac..fdec212e 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -16,6 +16,20 @@ #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" @@ -23,7 +37,7 @@ #include "external/../register/register.h" namespace ge { -class HostCpuEngine { +class GE_FUNC_VISIBILITY HostCpuEngine { public: ~HostCpuEngine() = default; diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h index 8a7dafe2..38653554 100644 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h @@ -22,7 +22,7 @@ namespace ge { namespace ge_local { -class GeLocalOpsKernelBuilder : public OpsKernelBuilder { +class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder { public: ~GeLocalOpsKernelBuilder() override; Status Initialize(const map &options) override; diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h index cdfbeffa..d35b01c7 100755 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h @@ -17,6 +17,20 @@ #ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -25,7 +39,7 @@ namespace ge { namespace ge_local { -class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { +class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { public: GeLocalOpsKernelInfoStore() = default; diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h index 55587b2e..e9efe0aa 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h @@ -21,7 +21,7 @@ namespace ge { namespace ge_local { -class GeDeletedOp : public Op { +class GE_FUNC_VISIBILITY GeDeletedOp : public Op { public: GeDeletedOp(const Node &node, RunContext &run_context); diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.h b/ge/ge_local_engine/ops_kernel_store/op/no_op.h index 40e5766b..85b6bb58 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.h @@ -21,7 +21,7 @@ namespace ge { namespace ge_local { -class NoOp : public Op { +class GE_FUNC_VISIBILITY NoOp : public Op { public: NoOp(const Node &node, RunContext &run_context); diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.h b/ge/ge_local_engine/ops_kernel_store/op/op.h index c5a3df7a..b75a8bed 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op.h @@ -29,7 +29,7 @@ namespace ge_local { /** * The base class for all op. */ -class Op { +class GE_FUNC_VISIBILITY Op { public: Op(const Node &node, RunContext &run_context); diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h index 0faab508..8dd411b6 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h @@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function(const Node &, RunConte /** * manage all the op, support create op. */ -class OpFactory { +class GE_FUNC_VISIBILITY OpFactory { public: static OpFactory &Instance(); @@ -72,7 +72,7 @@ class OpFactory { std::vector all_ops_; }; -class OpRegistrar { +class GE_FUNC_VISIBILITY OpRegistrar { public: OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { OpFactory::Instance().RegisterCreator(type, func); diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index 56b5ab41..b00dd5b3 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE -fno-common ) -target_compile_definitions(ge_runtime PRIVATE +target_compile_definitions(ge_runtime PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 LOG_CPP ) @@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE ${CMAKE_BINARY_DIR}/proto/ge ) +target_link_options(ge_runtime PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(ge_runtime PRIVATE $ -Wl,--no-as-needed diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index ebd23948..41f24b94 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } } reusable_block->continuous_block_ = continuous; - reusable_block->ref_count_++; reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } @@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); block->stream_id_ = node_op_desc->GetStreamId(); - block->ref_count_++; block->continuous_block_ = continuous; block->batch_label_ = batch_label; if (mem_type == kOutput) { @@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorfirst_continuous_block_ = true; block->last_continuous_block_ = true; + ++(block->ref_count_); } else { GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); return INTERNAL_ERROR; @@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, return nullptr, "Get no align size failed"); std::string symbol; + bool reuse_input = false; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); @@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block->SetLifeTimeEnd(life_time_); block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); block->ref_count_++; + reuse_input = true; // add new size align_size = block_size; @@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); - int out_count_reuse_input = block->ref_count_; int out_count = 0; GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); auto out_data_anchor = n->GetOutDataAnchor(index); @@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, out_count++; } } - bool reuse_input = false; - for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto owner_node = in_anchor->GetOwnerNode(); - GE_IF_BOOL_EXEC(owner_node == nullptr, continue); - auto op_desc = owner_node->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, continue); - for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { - bool dst_reuse_input = false; - uint32_t dst_reuse_input_index = 0; - auto owner_node_op_desc = op_desc->GetOutputDescPtr(i); - GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue); - GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS, - GELOGI("Get dst_reuse_input failed")); - GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, - GELOGI("Get dst_reuse_input_index failed")); - if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { - out_count_reuse_input += 1; - reuse_input = true; - } - } - } - block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count; + block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1) + : (block->ref_count_ + out_count); return block; } @@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_mapGetName().c_str()); - if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && - (node_type_indexs.back().index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx()))) { + bool is_block_matched = false; + for (auto &node_type_index : node_type_indexs) { + is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && + (node_type_index.index == static_cast(in_anchor->GetPeerOutAnchor()->GetIdx())); + if (is_block_matched) { + GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, " + "current node:%s, input index:%d, block ref_count:%d.", + node_type_index.node->GetName().c_str(), node_type_index.index, + node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_); + break; + } + } + + if (is_block_matched) { ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); if (block->ref_count_ == 0 && block->same_stream_) { SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); } + break; } } } @@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector &env_vec, bool &op_ return; } +void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) { + if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) { + return; + } + int64_t stream_id = node->GetOpDesc()->GetStreamId(); + auto out_data_anchor = node->GetOutDataAnchor(static_cast(idx)); + bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0); + if (is_suspended) { + block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1); + stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block); + GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, " + "size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_); + } +} + Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector &ranges) { auto op_desc = node->GetOpDesc(); int64_t stream_id = op_desc->GetStreamId(); @@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector // Allocate memory for the current node and release node memory of the same size in the workspace GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); - ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); + ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); + iter->second[stream_id].clear();}); if (IsContinuousOutput(node)) { return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); } @@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } symbol_blocks_[iter->second] = mem_block; + // The output is suspended, and will be released in allocation of next node. + CheckAndReleaseSuspendedBlock(node, i, mem_block); } } return SUCCESS; @@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { return; } - for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { - iter->second[stream_id].clear(); - } vector temp; int64_t tatal_size = 0; GetNodeWorkSpaceSize(n, temp, tatal_size); @@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { kWorkspace, n, static_cast(i), workspace_reuse_flag, is_op_reuse_mem_, false, memory_type); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); + ++(mem_block->ref_count_); CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); } for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 4401108d..199a84f9 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner { void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); + void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block); + std::unordered_map>> reusable_blocks_; std::unordered_map>> stream_workspace_blocks_; @@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner { std::unordered_map> node_continuous_input_blocks_; - std::unordered_map node_continuous_input_counts_; + std::map node_continuous_input_counts_; // reuse memory vector op_no_reuse_mem_vec_; diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 8c5d8940..ca64c869 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), - node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), + peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); } @@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node } GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), - node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); } diff --git a/ge/graph/build/run_context.cc b/ge/graph/build/run_context.cc index 50094cf3..ba328840 100644 --- a/ge/graph/build/run_context.cc +++ b/ge/graph/build/run_context.cc @@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even // Create rt label for (uint32_t i = 0; i < label_num; ++i) { rtLabel_t label = nullptr; - rt_ret = rtLabelCreate(&label); + rt_ret = rtLabelCreateV2(&label, rt_model_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast(rt_ret), i); return RT_FAILED; diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 88ffda02..d90d1f40 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() { } } - Status status = ReorderEventNodes(); + Status status = whole_graph_->InsertGraphEvents(); if (status != SUCCESS) { GELOGE(status, "Graph ReorderEventNodes failed"); return status; @@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() { return SUCCESS; } -Status StreamAllocator::ReorderEventNodes() const { - Status status = whole_graph_->InsertEventNodes(); - if (status != SUCCESS) { - GELOGE(status, "Whole graph InsertEventNodes failed"); - return status; - } - for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) { - status = subgraph->InsertEventNodes(); - if (status != SUCCESS) { - GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str()); - return status; - } - } - return SUCCESS; -} - void StreamAllocator::DumpEvents() { map> after_refresh_stream_nodes; for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { diff --git a/ge/graph/build/stream_allocator.h b/ge/graph/build/stream_allocator.h index a21b2f77..dd82700d 100644 --- a/ge/graph/build/stream_allocator.h +++ b/ge/graph/build/stream_allocator.h @@ -74,7 +74,6 @@ class StreamAllocator { Status RefreshContinuousEvents(); Status InsertSyncEventNodes(); - Status ReorderEventNodes() const; void DumpEvents(); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 8bd7d32e..3f4cd1bc 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map> &fusion // and it have no attr or group attr different // which means bad case, return error bool call_check = true; - std::unordered_set input_group_ids; + std::set input_group_ids; for (const auto &input_node : node->GetInNodes()) { auto iter = nodes_with_group_attr.find(input_node); if (iter == nodes_with_group_attr.end()) { @@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { return GE_GRAPH_GRAPH_NODE_NULL; } - int64_t node_index = 0; - for (auto &node : all_nodes) { - OpDescPtr op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - op_desc->SetId(node_index++); - } - map> all_stream_ops; for (auto &node : all_nodes) { OpDescPtr op_desc = node->GetOpDesc(); @@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin } if (graph->GetNeedIteration()) { - if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { + if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { profiling_point.end_index.insert(current_idx); GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", op_desc->GetName().c_str(), current_idx); diff --git a/ge/graph/common/transop_util.h b/ge/graph/common/transop_util.h index 3332e1fb..883ae41b 100644 --- a/ge/graph/common/transop_util.h +++ b/ge/graph/common/transop_util.h @@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static TransOpUtil &Instance(); - typedef std::unordered_map transop_index_op; + typedef std::map transop_index_op; transop_index_op transop_index_map_; }; } // namespace ge diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc index d9b716ea..6807043a 100644 --- a/ge/graph/load/model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc @@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() { /// @param [in] outside_addrs: model input/output memory addr /// @return: 0 for success / others for failed /// -Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, std::map &outside_addrs) { +Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, const map &outside_addrs) { if ((args_ != nullptr) || (args_size_ > 0)) { GELOGE(FAILED, "Task already initialized, size: %u", args_size_); return FAILED; @@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, std::map> virtual_args_addrs = addrs_mapping_list[0]; - for (const auto &virtual_args_addr : virtual_args_addrs) { - addr_map_info.addr_num += virtual_args_addr.second.size(); - } - } - GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); - // init src_addrs/dst_addrs - size_t index = 0; vector src_addrs; vector dst_addrs; - for (auto &addrs : outside_addrs) { - auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); + for (const auto &addrs : outside_addrs) { + const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); std::map> virtual_args_addrs = addrs_mapping_list[0]; for (const auto &virtual_args_addr : virtual_args_addrs) { + addr_map_info.addr_num += virtual_args_addr.second.size(); for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { - src_addrs.push_back(mbuf_list.at(index)); + src_addrs.emplace_back(mbuf_list.at(addrs.first)); dst_addrs.push_back(static_cast(reinterpret_cast(virtual_args_addr.second.at(i)))); } } - index++; } + GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h index de4c5327..8dc44538 100644 --- a/ge/graph/load/model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/model_manager/cpu_queue_schedule.h @@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { ~CpuTaskZeroCopy() override; Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } - Status Init(std::vector &mbuf_list, std::map &outside_addrs); + Status Init(std::vector &mbuf_list, const map &outside_addrs); Status Distribute() override; private: diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 95fd8392..ed2428d9 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { }; vector output_op_list; + set input_outside_addrs; + set output_outside_addrs; map data_by_index; map variable_by_name; auto nodes = compute_graph->GetAllNodes(); @@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); if (IsDataOp(op_desc->GetType())) { - if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { + if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) { GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } if (op_desc->GetType() == NETOUTPUT) { - if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { + if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) { GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { /// @return Status /// Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, - map &data_by_index) { + map &data_by_index, set &input_outside_addrs) { // op_desc Checked by Init: Data, valid. auto op_desc = node->GetOpDesc(); if (node->GetOwnerComputeGraph() != graph) { @@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); return PARAM_INVALID; } - new_input_data_info_[data_index] = zero_copy_offset; - - for (size_t index = 0; index < virtual_addr_list.size(); ++index) { - void *addr = virtual_addr_list.at(index); - if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) { - continue; - } - zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_); - new_input_outside_addrs_[addr] = zero_copy_offset; + if (input_outside_addrs.count(virtual_addr) == 0) { + int64_t output_offset = output_offset_list.at(kDataIndex); + zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_); + input_outside_addrs.insert(virtual_addr); } + input_data_info_[data_index] = zero_copy_offset; return SUCCESS; } @@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { /// @param [in/out] vector: All NetOutput node in model. /// @return Status Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, - vector &output_op_list) { + vector &output_op_list, set &output_outside_addrs) { // node->GetOpDesc Checked by Init: NetOutput, valid. auto op_desc = node->GetOpDesc(); // excludes the function op sub graph, e.g. case,if @@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & return PARAM_INVALID; } - size_t num = new_output_data_info_.size(); + size_t num = output_data_info_.size(); bool fusion_flag = false; size_t input_count = input_size_list.size(); @@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); return PARAM_INVALID;); - new_output_data_info_[num + idx] = zero_copy_offset; void *addr = virtual_addr_list.at(idx); int64_t input_offset = input_offset_list.at(idx); - vector tensor_addrs; - zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); - auto rslt = new_output_outside_addrs_.insert(std::pair(addr, zero_copy_offset)); - if (!rslt.second) { + if (output_outside_addrs.count(addr) == 0) { + vector tensor_addrs; + zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); + output_outside_addrs.insert(addr); + for (size_t i = 0; i < tensor_addrs.size(); ++i) { + void *real_addr = tensor_addrs.at(i); + DisableZeroCopy(real_addr); + real_virtual_addrs_.insert(real_addr); + } + } else { GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); DisableZeroCopy(addr); } - - for (size_t i = 0; i < tensor_addrs.size(); ++i) { - void *real_addr = tensor_addrs.at(i); - DisableZeroCopy(real_addr); - real_virtual_addrs_.insert(real_addr); - } + output_data_info_[num + idx] = zero_copy_offset; } return SUCCESS; } @@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { } rtLabel_t rt_label = nullptr; - rtError_t rt_error = rtLabelCreateEx(&rt_label, stream); + rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream); if (rt_error != RT_ERROR_NONE || rt_label == nullptr) { GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error); return INTERNAL_ERROR; @@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() { return SUCCESS; } - if (input_queue_ids_.size() != new_input_data_info_.size()) { + if (input_queue_ids_.size() != input_data_info_.size()) { GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", - input_queue_ids_.size(), new_input_data_info_.size()); + input_queue_ids_.size(), input_data_info_.size()); return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; } - if (output_queue_ids_.size() != new_output_data_info_.size()) { + if (output_queue_ids_.size() != output_data_info_.size()) { GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Output queue ids not match model: output_queue=%zu output_data=%zu", - output_queue_ids_.size(), new_output_data_info_.size()); + output_queue_ids_.size(), output_data_info_.size()); return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; } GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); // Binding input_queue and Data Op. GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); - GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed."); + GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed."); // Binding output_queue and NetOutput Op. GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); - GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed."); + GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed."); GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); @@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() { Status DavinciModel::BindInputQueue() { // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() for (size_t i = 0; i < input_queue_ids_.size(); ++i) { - auto it = new_input_data_info_.find(i); - if (it == new_input_data_info_.end()) { - GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i); + auto it = input_data_info_.find(i); + if (it == input_data_info_.end()) { + GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i); return FAILED; } @@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { } Status DavinciModel::CpuTaskModelZeroCopy(std::vector &mbuf_list, - std::map &outside_addrs) { + const map &outside_addrs) { GELOGI("Set CpuKernel model zero_copy task enter."); std::shared_ptr zero_copy = MakeShared(rt_entry_stream_); if (zero_copy == nullptr) { @@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector &mbuf_list, Status DavinciModel::BindOutputQueue() { // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() for (size_t i = 0; i < output_queue_ids_.size(); ++i) { - auto it = new_output_data_info_.find(i); - if (it == new_output_data_info_.end()) { - GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); + auto it = output_data_info_.find(i); + if (it == output_data_info_.end()) { + GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); return FAILED; } @@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() { Status DavinciModel::BindEnqueue() { for (size_t i = 0; i < output_queue_ids_.size(); ++i) { - auto it = new_output_data_info_.find(i); - if (it == new_output_data_info_.end()) { - GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); + auto it = output_data_info_.find(i); + if (it == output_data_info_.end()) { + GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); return FAILED; } @@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector &output_descs Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; const std::vector &blobs = input_data.blobs; - for (const auto &data : new_input_data_info_) { + for (const auto &data : input_data_info_) { if (data.first >= blobs.size()) { GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), - new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, + input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, data.second.GetOpName().c_str()); return FAILED; } @@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r output_data.index = data_id; output_data.model_id = model_id_; - if (output_data.blobs.size() != new_output_data_info_.size()) { + if (output_data.blobs.size() != output_data_info_.size()) { GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), - new_output_data_info_.size()); + output_data_info_.size()); return FAILED; } std::vector &blobs = output_data.blobs; size_t idx = 0; - for (const auto &output : new_output_data_info_) { + for (const auto &output : output_data_info_) { if (output.first >= blobs.size()) { GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); + input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); return FAILED; } @@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { /// @return None. /// void DavinciModel::SetCopyOnlyOutput() { - for (const auto &output_outside_addrs : new_output_outside_addrs_) { + for (const auto &output_outside_addrs : output_data_info_) { ZeroCopyOffset output_outside = output_outside_addrs.second; + if (!output_outside.IsRelativeOffsetValid()) { + return; + } for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); std::map> virtual_args_addrs = addrs_mapping_list[out_count]; @@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector lock(outside_addrs_mutex_); - for (auto &input_outside_addrs : new_input_outside_addrs_) { + for (auto &input_outside_addrs : input_data_info_) { ZeroCopyOffset &input_outside = input_outside_addrs.second; input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } - for (auto &output_outside_addrs : new_output_outside_addrs_) { + for (auto &output_outside_addrs : output_data_info_) { ZeroCopyOffset &output_outside = output_outside_addrs.second; output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } @@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 /// @return SUCCESS handle successfully / PARAM_INVALID for failed /// Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { - if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { + if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); return ACL_ERROR_GE_PARAM_INVALID; } - if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != + if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); return ACL_ERROR_GE_PARAM_INVALID; diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 53e9cd4d..8ed82912 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -675,7 +675,7 @@ class DavinciModel { /// @return Status /// Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, - map &data_by_index); + map &data_by_index, set &input_outside_addrs); /// /// @ingroup ge @@ -694,7 +694,8 @@ class DavinciModel { /// @param [in/out] vector: All NetOutput node in model. /// @return Status /// - Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector &output_op_list); + Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector &output_op_list, + set &output_outside_addrs); /// /// @ingroup ge @@ -764,7 +765,7 @@ class DavinciModel { /// Status BindInputQueue(); - Status CpuTaskModelZeroCopy(vector &mbuf_list, map &outside_addrs); + Status CpuTaskModelZeroCopy(vector &mbuf_list, const map &outside_addrs); /// /// @ingroup ge @@ -897,10 +898,8 @@ class DavinciModel { void *global_step_addr_{nullptr}; uint64_t global_step_size_{0}; - map new_input_data_info_; - map new_output_data_info_; - map new_input_outside_addrs_; - map new_output_outside_addrs_; + map input_data_info_; + map output_data_info_; set real_virtual_addrs_; diff --git a/ge/graph/load/model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h index 64a64930..74ce5a16 100644 --- a/ge/graph/load/model_manager/ts_mem_mall.h +++ b/ge/graph/load/model_manager/ts_mem_mall.h @@ -100,8 +100,8 @@ class TsMemMall { private: std::mutex mem_mutex_; - std::unordered_map mem_store_size_; - std::unordered_map mem_store_addr_; + std::map mem_store_size_; + std::map mem_store_addr_; rtMemType_t mem_type_; }; } // namespace ge diff --git a/ge/graph/load/model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc index 3f8555bb..4a448869 100644 --- a/ge/graph/load/model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector &fusion_basic_addrs, const } } -void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::set &real_virtual_addrs) { +void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, + set &real_virtual_addrs) { uint32_t out_count = 0; if (!fusion_flag) { out_count++; @@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l real_virtual_addrs.insert(addr); } else { GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); - int64_t output_offset = output_offset_list.at(index); for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { if (zero_copy_basic_offset_.at(i) == output_offset) { out_count++; @@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l } } addr_count_ = out_count; + valid_relative_offset_ = true; } void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, @@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo } } addr_count_ = out_count; + valid_relative_offset_ = true; } void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { + if (!valid_relative_offset_) { + return; + } const auto addr_val = reinterpret_cast(outside_addr); for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { auto args_addrs = outside_addrs_[out_count].find(outside_addr); diff --git a/ge/graph/load/model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h index fc63fced..82e1bb6d 100644 --- a/ge/graph/load/model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -43,8 +43,7 @@ class ZeroCopyOffset { ~ZeroCopyOffset(); Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); - void SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::set &real_virtual_addrs); + void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set &real_virtual_addrs); void IsL2Fusion(const vector &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); Status InitOutputDataInfo(const vector &input_size_list, const vector &virtual_addr_list, @@ -65,9 +64,10 @@ class ZeroCopyOffset { // data_size of Data/Netoutput int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model - const std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + const std::vector>> &GetOutsideAddrs() const { return outside_addrs_; } // name of op std::string GetOpName() const { return op_name_; } + const bool IsRelativeOffsetValid() const { return valid_relative_offset_; } private: void *basic_addr_ = nullptr; @@ -81,6 +81,7 @@ class ZeroCopyOffset { std::vector zero_copy_basic_offset_; std::vector zero_copy_relative_offset_; + bool valid_relative_offset_ = false; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index b6598f11..8b57858d 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -131,7 +131,7 @@ bool IsTailingOptimization() { } ge::Status CheckFpCeilingMode() { - static const std::unordered_set kValidFpCeilingMode = {"0", "1", "2"}; + static const std::set kValidFpCeilingMode = {"0", "1", "2"}; string mode; auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); if (ret == ge::GRAPH_SUCCESS) { diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index 924ddcb7..0da12f9c 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -170,8 +170,8 @@ class VarResource { std::unordered_map var_addr_mgr_map_; std::unordered_map cur_var_tensor_desc_map_; std::unordered_map> var_to_trans_road_; - std::unordered_map var_names_to_changed_graph_id_; - std::unordered_map var_names_to_allocated_graph_id_; + std::map var_names_to_changed_graph_id_; + std::map var_names_to_allocated_graph_id_; std::map> var_broad_cast_info_; }; diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index fbc13920..d584337e 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo /// Avoid recursion since stack space might be limited. /// We instead keep a stack of nodes to visit. std::vector temp_stack; - std::unordered_set visited; + std::set visited; temp_stack.push_back(src); while (!temp_stack.empty()) { size_t cluster = temp_stack.back(); diff --git a/ge/graph/partition/graph_partition.h b/ge/graph/partition/graph_partition.h index 9c22d40c..f34c67e6 100644 --- a/ge/graph/partition/graph_partition.h +++ b/ge/graph/partition/graph_partition.h @@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map; using NodetoNodeMap = std::unordered_map; using EnginetoGraphMap = std::unordered_map; using EdgeMap = std::set>; -using ClusterSet = std::unordered_set; +using ClusterSet = std::set; class Cluster { public: size_t index_; // corresponding to rank of node diff --git a/ge/graph/passes/constant_folding_pass.cc b/ge/graph/passes/constant_folding_pass.cc index 4db14fc3..8a0c6c3c 100644 --- a/ge/graph/passes/constant_folding_pass.cc +++ b/ge/graph/passes/constant_folding_pass.cc @@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node, return FoldingPass::RunOpKernel(node, inputs, outputs); } -const std::unordered_map> +const std::map> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { return statistic_of_ge_constant_folding_; } -const std::unordered_map> +const std::map> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { return statistic_of_op_constant_folding_; } diff --git a/ge/graph/passes/constant_folding_pass.h b/ge/graph/passes/constant_folding_pass.h index c977157e..703e6edd 100644 --- a/ge/graph/passes/constant_folding_pass.h +++ b/ge/graph/passes/constant_folding_pass.h @@ -26,11 +26,11 @@ namespace ge { class ConstantFoldingPass : public FoldingPass { public: Status Run(ge::NodePtr &node) override; - const std::unordered_map> &GetGeConstantFoldingPerfStatistic() const; - const std::unordered_map> &GetOpConstantFoldingPerfStatistic() const; + const std::map> &GetGeConstantFoldingPerfStatistic() const; + const std::map> &GetOpConstantFoldingPerfStatistic() const; private: - std::unordered_map> statistic_of_op_constant_folding_; - std::unordered_map> statistic_of_ge_constant_folding_; + std::map> statistic_of_op_constant_folding_; + std::map> statistic_of_ge_constant_folding_; }; } // namespace ge diff --git a/ge/graph/passes/hccl_continuous_memcpy_pass.cc b/ge/graph/passes/hccl_continuous_memcpy_pass.cc index 7dd2fb06..cc928479 100644 --- a/ge/graph/passes/hccl_continuous_memcpy_pass.cc +++ b/ge/graph/passes/hccl_continuous_memcpy_pass.cc @@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, } GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); + if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) { + GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str()); + return nullptr; + } + graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); diff --git a/ge/graph/passes/hccl_continuous_memcpy_pass.h b/ge/graph/passes/hccl_continuous_memcpy_pass.h index 0a21c896..538e89e9 100644 --- a/ge/graph/passes/hccl_continuous_memcpy_pass.h +++ b/ge/graph/passes/hccl_continuous_memcpy_pass.h @@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass { bool IsDataNode(const std::string& node_type); - std::unordered_map node_num_map_; + std::map node_num_map_; }; } // namespace ge diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index feea82d9..7ab63c59 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass { bool IsDataNode(const std::string& node_type); - std::unordered_map node_num_map_; + std::map node_num_map_; }; } // namespace ge diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index 17a1e3bb..b8fb6bde 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { } // parser data dynamic info from atc parameter --input_shape - if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims, - data_to_dynamic_info_) != SUCCESS) { + if (CheckAndParseDynamicData() != SUCCESS) { GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); return PARAM_INVALID; } @@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { return SUCCESS; } +Status MultiBatchClonePass::CheckAndParseDynamicData() { + size_t unknown_shape_count = 0; + auto data_name_and_shape = GetLocalOmgContext().user_input_dims; + std::vector data_name_order; + for (auto &item : data_name_and_shape) { + data_name_order.push_back(item.first); + } + if (!getnext_sink_dynamic_dims_) { + for (const auto &node : all_data_nodes_) { + auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex); + auto data_shape = data_desc.GetShape(); + auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" : + data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others"; + auto data_name = node->GetName(); + + const auto &data_shape_dims = data_shape.GetDims(); + if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) { + continue; + } + ++unknown_shape_count; + auto iter = find(data_name_order.begin(), data_name_order.end(), data_name); + if (iter == data_name_order.end()) { + if (!GetLocalOmgContext().dynamic_batch_size.empty()) { + auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name); + GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.", + data_name.c_str()); return PARAM_INVALID); + } else if (!GetLocalOmgContext().dynamic_image_size.empty()) { + auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format); + GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.", + data_name.c_str()); return PARAM_INVALID); + } else if (!GetLocalOmgContext().dynamic_dims.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"}, + {"--input_shape", "all dynamic data must be set in --input_shape"}); + GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", + node->GetName().c_str(), data_shape.ToString().c_str()); + return INTERNAL_ERROR; + } + data_name_and_shape.emplace_back(data_name, data_shape_dims); + } + } + } + auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_); + GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); + if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { + ErrorManager::GetInstance().ATCReportErrMessage("E10040"); + GELOGE(PARAM_INVALID, + "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); + return PARAM_INVALID; + } + return SUCCESS; +} + Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { data_count_from_getnext_ = 0; getnext_sink_dynamic_dims_ = false; diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h index 66e92892..0dae88ca 100755 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num); + + Status CheckAndParseDynamicData(); std::string session_graph_id_; std::vector> batch_shapes_; diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 05628871..e82ec17f 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass { std::vector stream_switch_nodes_; std::unordered_map>>> cond_node_map_; std::unordered_map> switch_node_map_; - std::unordered_map node_num_map_; + std::map node_num_map_; }; } // namespace ge #endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_ diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index e43c5dd2..215b31ee 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ } } } - auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); + auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { ErrorManager::GetInstance().ATCReportErrMessage("E10040"); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 84f38fa6..3bde0efb 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -377,7 +377,7 @@ bool InitDynamicParams(vector> &shapes) { /// @param [out] map>> &data_to_dynamic_info: key:data_name. value:dynamic dims. /// @return true: Configed for Multi batch / false: Not configed for Multi batch. /// -Status ParserDataToDynmaicInfo(const vector> &shapes, +Status ParserDataToDynamicInfo(const vector> &shapes, vector>> &data_name_and_shape, map> > &data_to_dynamic_info) { size_t cur_data_index = 0; diff --git a/ge/graph/preprocess/multi_batch_options.h b/ge/graph/preprocess/multi_batch_options.h index 9baf4f43..0ddaea0d 100644 --- a/ge/graph/preprocess/multi_batch_options.h +++ b/ge/graph/preprocess/multi_batch_options.h @@ -74,7 +74,7 @@ Status CalcShape(const std::vector &batch_shape, GeShape &data_shape); /// @param [out] map>> &data_to_dynamic_info: key:data_name. value:dynamic dims. /// @return SUCCESS / PARAM_INVALID /// -Status ParserDataToDynmaicInfo(const vector> &shapes, +Status ParserDataToDynamicInfo(const vector> &shapes, vector>> &data_name_and_shape, map>> &data_to_dynamic_info); @@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc); /// @param [in] const string &data_name: cur data name. /// @return 0: true/false /// -bool CheckDynamicBatchShape(const vector &shape, const string &data_name); +GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector &shape, const string &data_name); /// /// @ingroup ge @@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector &shape, const string &data_nam /// @param [in] const std::string &input_format: format of input. /// @return 0: true/false /// -bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, +GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, const std::string &input_format); } // namespace multibatch diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index cbd0bd8b..13cb7434 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(host_cpu_engine PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(host_cpu_engine PRIVATE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(host_cpu_engine PRIVATE @@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(host_cpu_engine PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(host_cpu_engine PRIVATE $ -Wl,--no-as-needed @@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_options(atc_host_cpu_engine PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(atc_host_cpu_engine PRIVATE - COMPILE_OMG_PACKAGE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(atc_host_cpu_engine PRIVATE @@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(atc_host_cpu_engine PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(atc_host_cpu_engine PRIVATE $ -Wl,--no-as-needed @@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(host_cpu_opskernel_builder PRIVATE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(host_cpu_opskernel_builder PRIVATE @@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(host_cpu_opskernel_builder PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(host_cpu_opskernel_builder PRIVATE $ -Wl,--no-as-needed @@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) target_compile_options(atc_host_cpu_opskernel_builder PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE google=ascend_private + FUNC_VISIBILITY ) target_include_directories(atc_host_cpu_opskernel_builder PRIVATE @@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(atc_host_cpu_opskernel_builder PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE $ -Wl,--no-as-needed @@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder_static PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE google=ascend_private LOG_CPP + FUNC_VISIBILITY ) target_include_directories(host_cpu_opskernel_builder_static PRIVATE diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.h b/ge/host_cpu_engine/engine/host_cpu_engine.h index c8d5608f..c29df00c 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.h +++ b/ge/host_cpu_engine/engine/host_cpu_engine.h @@ -17,6 +17,20 @@ #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -32,7 +46,7 @@ namespace host_cpu { * host cpu engine. * Used for the ops which executes on host. */ -class HostCpuEngine { +class GE_FUNC_VISIBILITY HostCpuEngine { public: /** * get HostCpuEngine instance. @@ -87,25 +101,25 @@ extern "C" { * When Ge start, GE will invoke this interface * @return The status whether initialize successfully */ -ge::Status Initialize(const map &options); +GE_FUNC_VISIBILITY ge::Status Initialize(const map &options); /** * After the initialize, GE will invoke this interface to get the Ops kernel Store * @param ops_kernel_map The host cpu's ops kernel info */ -void GetOpsKernelInfoStores(std::map &ops_kernel_map); +GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map &ops_kernel_map); /** * After the initialize, GE will invoke this interface to get the Graph Optimizer * @param graph_optimizers The host cpu's Graph Optimizer objs */ -void GetGraphOptimizerObjs(std::map &graph_optimizers); +GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map &graph_optimizers); /** * When the graph finished, GE will invoke this interface * @return The status whether initialize successfully */ -ge::Status Finalize(); +GE_FUNC_VISIBILITY ge::Status Finalize(); } #endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h index 82375b9f..066d943c 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h @@ -17,11 +17,25 @@ #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include "common/opskernel/ops_kernel_builder.h" namespace ge { namespace host_cpu { -class HostCpuOpsKernelBuilder : public OpsKernelBuilder { +class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder { public: Status Initialize(const map &options) override; diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h index f7539f8e..e3667d61 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h @@ -17,6 +17,20 @@ #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -25,7 +39,7 @@ namespace ge { namespace host_cpu { -class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { +class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { public: HostCpuOpsKernelInfoStore() {} ~HostCpuOpsKernelInfoStore() override = default; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h index 0f560485..023eb957 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h @@ -21,7 +21,7 @@ namespace ge { namespace host_cpu { -class HostOp : public Op { +class GE_FUNC_VISIBILITY HostOp : public Op { public: HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} ~HostOp() override = default; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op.h b/ge/host_cpu_engine/ops_kernel_store/op/op.h index c094f080..b4c8b33e 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op.h @@ -29,7 +29,7 @@ namespace host_cpu { /** * The base class for all op. */ -class Op { +class GE_FUNC_VISIBILITY Op { public: Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} virtual ~Op() = default; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h index 3a235ffd..73174860 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h @@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function(const Node &, RunConte /** * manage all the op, support create op. */ -class OpFactory { +class GE_FUNC_VISIBILITY OpFactory { public: static OpFactory &Instance(); @@ -70,7 +70,7 @@ class OpFactory { std::vector all_ops_; }; -class OpRegistrar { +class GE_FUNC_VISIBILITY OpRegistrar { public: OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { OpFactory::Instance().RegisterCreator(type, func); diff --git a/ge/hybrid/common/tensor_value.cc b/ge/hybrid/common/tensor_value.cc index 16ecfaa4..c691c6f3 100644 --- a/ge/hybrid/common/tensor_value.cc +++ b/ge/hybrid/common/tensor_value.cc @@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s TensorValue::~TensorValue() { Destroy(); } void TensorValue::Destroy() { - if (buffer_ != nullptr || ref_buffer_ != nullptr) { + if (buffer_ != nullptr) { GELOGD("Unref tensor: %s", DebugString().c_str()); buffer_.reset(); } diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index c47dafc1..9c4bb217 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); - HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); + HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), + "Failed to execute partitioned call."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); + args.outputs.clear(); HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); return SUCCESS; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index f8f122b1..8b194233 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector } Status SubgraphExecutor::ExecuteAsync(const std::vector &inputs, - const std::vector &input_desc) { + const std::vector &input_desc, + const std::vector &outputs) { GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); - + if (!outputs.empty()) { + GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs), + "Failed to enable output zero copy by user provided outputs."); + } if (!graph_item_->IsDynamic()) { return ExecuteAsyncForKnownShape(inputs); } @@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector &inputs, return SUCCESS; } +Status SubgraphExecutor::ExecuteAsync(const std::vector &inputs, + const std::vector &input_desc) { + return ExecuteAsync(inputs, input_desc, {}); +} + Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector &inputs) { GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); if (graph_item_->GetAllNodes().size() != 1) { @@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { return SUCCESS; } + +Status SubgraphExecutor::EnableOutputZeroCopy(const vector &outputs) { + GELOGD("To enable zero copy, output number = %zu", outputs.size()); + const auto &output_edges = graph_item_->GetOutputEdges(); + // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node + if (outputs.size() != output_edges.size()) { + GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu", + output_edges.size(), + outputs.size()); + return PARAM_INVALID; + } + + for (size_t i = 0; i < outputs.size(); ++i) { + auto &output_tensor = outputs[i]; + auto &output_node = output_edges[i].first; + int output_idx = output_edges[i].second; + GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s", + graph_item_->GetName().c_str(), + i, + output_node->NodeName().c_str(), + output_idx, + output_tensor.DebugString().c_str()); + + GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor), + "[%s] Failed to set input tensor[%zu]", + graph_item_->GetName().c_str(), + i); + } + + GELOGD("Done enabling zero copy for outputs successfully."); + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index 4523e2c4..2b7e9371 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -43,7 +43,19 @@ class SubgraphExecutor { * @param input_desc input tensor descriptions * @return SUCCESS on success, error code otherwise */ - Status ExecuteAsync(const std::vector &inputs, const std::vector &input_desc); + Status ExecuteAsync(const std::vector &inputs, + const std::vector &input_desc); + + /** + * Execute subgraph async, output tensor address(not data) and output tensor descriptions are + * valid after this method returned + * @param inputs input tensors + * @param input_desc input tensor descriptions + * @return SUCCESS on success, error code otherwise + */ + Status ExecuteAsync(const std::vector &inputs, + const std::vector &input_desc, + const std::vector &outputs); /** * Execute subgraph async, output tensor address(not data) and output tensor descriptions are @@ -76,6 +88,7 @@ class SubgraphExecutor { private: Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); + Status EnableOutputZeroCopy(const std::vector &outputs); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); Status Init(const std::vector &inputs, const std::vector &input_desc); diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 7e5d8fe5..4511c2b9 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -40,9 +40,14 @@ HybridModel::~HybridModel() { GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); } -Status HybridModel::Init() { +Status HybridModel::Init(bool is_single_op) { GELOGD("Start to init hybrid model."); - GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); + is_single_op_ = is_single_op; + if (is_single_op) { + GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); + } else { + GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); + } GELOGD("HybridModel initialized successfully."); return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 72495cad..1f973d1e 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -37,7 +37,7 @@ class HybridModel { ~HybridModel(); - Status Init(); + Status Init(bool is_single_op = false); const NodeItem *GetNodeItem(const NodePtr &node) const; @@ -69,6 +69,10 @@ class HybridModel { return model_id_; } + bool IsSingleOp() const { + return is_single_op_; + } + TensorValue* GetVariable(const string &name) const; NodePtr GetVariableNode(const string &name) const; @@ -131,11 +135,13 @@ class HybridModel { std::map> node_items_; bool is_new_model_desc_ = false; // support aipp + bool is_single_op_ = false; // runtime fields uint32_t device_id_ = 0; uint32_t model_id_ = 0; uint8_t *var_mem_base_ = nullptr; + std::unique_ptr weight_buffer_; RuntimeParam root_runtime_param_; }; } // namespace hybrid diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index b314c6a7..03e76bc9 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() { return SUCCESS; } +Status HybridModelBuilder::BuildForSingleOp() { + GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel"); + hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); + GELOGI("[%s] Start to build hybrid model.", GetGraphName()); + auto ret = ge_root_model_->GetSubgraphInstanceNameToModel(); + const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; + GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), + "[%s] Failed to index task defs", GetGraphName()); + GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); + GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); + GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); + GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName()); + return SUCCESS; +} + Status HybridModelBuilder::ValidateParams() { GE_CHECK_NOTNULL(ge_root_model_); GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); @@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() { } Status HybridModelBuilder::InitWeights() { + // For constant in root graph + const auto &root_graph = ge_root_model_->GetRootGraph(); + const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel(); + auto iter = subgraph_models.find(root_graph->GetName()); + if (iter == subgraph_models.end()) { + GELOGD("Root graph model not found"); + return SUCCESS; + } + + auto &root_model = iter->second; + const auto &weight_buffer = root_model->GetWeight(); + if (weight_buffer.GetSize() == 0) { + GELOGD("weight is empty"); + return SUCCESS; + } + auto allocator = NpuMemoryAllocator::GetAllocator(); GE_CHECK_NOTNULL(allocator); - - for (auto &it : hybrid_model_.node_items_) { - auto &node_item = it.second; - if (node_item->node_type != CONSTANT) { + hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size()); + GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_); + auto weight_base = reinterpret_cast(hybrid_model_.weight_buffer_->GetData()); + GE_CHK_RT_RET(rtMemcpy(weight_base, + hybrid_model_.weight_buffer_->GetSize(), + weight_buffer.GetData(), + weight_buffer.GetSize(), + RT_MEMCPY_HOST_TO_DEVICE)); + + GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", + weight_base, + hybrid_model_.weight_buffer_->GetSize()); + for (auto &node : root_graph->GetDirectNode()) { + if (node->GetType() != CONSTANT) { continue; } - const auto &constant_node = node_item->node; - auto op_desc = constant_node->GetOpDesc(); + auto op_desc = node->GetOpDesc(); auto v_weights = ModelUtils::GetWeights(op_desc); if (v_weights.empty()) { - GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); return INTERNAL_ERROR; } auto *ge_tensor = const_cast(v_weights[0].get()); - auto output_desc = op_desc->MutableOutputDesc(0); - GE_CHECK_NOTNULL(output_desc); - auto tensor_size = ge_tensor->GetData().GetSize(); - GELOGD("[%s] Start to init Constant node [%s], size = %ld", + GE_CHECK_NOTNULL(ge_tensor); + const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); + int64_t tensor_size = 0; + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), + "[%s] Failed to get tensor size", + node->GetName().c_str()); + int64_t data_offset = 0; + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), + "[%s] Failed to get data offset", + node->GetName().c_str()); + GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", GetGraphName(), - constant_node->GetName().c_str(), - tensor_size); + node->GetName().c_str(), + tensor_size, + data_offset); - auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); GE_CHECK_NOTNULL(tensor_buffer); std::unique_ptr constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); GE_CHECK_NOTNULL(constant_tensor); constant_tensor->SetName("Constant_" + op_desc->GetName()); - if (tensor_size > 0) { - GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), - constant_tensor->GetSize(), - ge_tensor->GetData().data(), - ge_tensor->GetData().size(), - RT_MEMCPY_HOST_TO_DEVICE)); - } - - hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); - GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); + hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); + GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); } return SUCCESS; } @@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr return SUCCESS; } +Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) { + // index task defs + GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str()); + std::unordered_map node_map; + for (const auto &node : sub_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(node->GetOpDesc()); + auto node_id = node->GetOpDesc()->GetId(); + GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str()); + node_map.emplace(node_id, node); + } + + auto tasks = ge_model->GetModelTaskDefPtr()->task(); + for (int i = 0; i < tasks.size(); ++i) { + const domi::TaskDef &task_def = tasks[i]; + GELOGI("Task id = %d, task type = %d", i, task_def.type()); + auto task_type = static_cast(task_def.type()); + uint32_t op_index = -1; + if (task_type == RT_MODEL_TASK_KERNEL) { + op_index = task_def.kernel().context().op_index(); + } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { + op_index = task_def.kernel_ex().op_index(); + } else if (task_type == RT_MODEL_TASK_HCCL) { + op_index = task_def.kernel_hccl().op_index(); + } else { + GELOGD("Skip task type: %d", static_cast(task_type)); + continue; + } + + auto iter = node_map.find(op_index); + if (iter == node_map.end()) { + GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index); + return INTERNAL_ERROR; + } + + auto &node = iter->second; + if (task_type == RT_MODEL_TASK_KERNEL) { + ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); + } + + GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index); + hybrid_model_.task_defs_[node].emplace_back(task_def); + } + + return SUCCESS; +} + Status HybridModelBuilder::IndexTaskDefs() { const auto &root_graph = ge_root_model_->GetRootGraph(); if (SetOutputNameAttr(*root_graph) != SUCCESS) { diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 045bf3ef..71663a6e 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -35,6 +35,7 @@ class HybridModelBuilder { explicit HybridModelBuilder(HybridModel &hybrid_model); ~HybridModelBuilder() = default; Status Build(); + Status BuildForSingleOp(); private: static Status UpdateAnchorStatus(const NodePtr &node); @@ -64,6 +65,7 @@ class HybridModelBuilder { Status ParseDependentInputNodes(NodeItem &node_item, const std::vector &dependencies); Status ParseDependentForFusedSubgraph(NodeItem &node_item); Status IndexTaskDefs(); + Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model); Status IndexSpecialNodes(); Status InitRuntimeParams(); Status InitModelMem(); diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index cb5a7d4c..3174df80 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() { Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GE_CHECK_NOTNULL(node); GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); + bool is_single_op = model.IsSingleOp(); auto *task_defs = model.GetTaskDefs(node); if (task_defs == nullptr || task_defs->empty()) { @@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); std::unique_ptr node_task; - GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str()); + GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op), + "[%s] Failed to build op tasks.", node->GetName().c_str()); task = std::move(node_task); GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f1bd6466..a34bba22 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { } TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); - if (rt_ret != RT_ERROR_NONE) { + if (rt_ret != RT_ERROR_NONE || is_single_op_) { void *bin_handle = nullptr; if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 3f350531..69a74ea9 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -50,6 +50,8 @@ class AiCoreOpTask { uint32_t GetBlockDim() const {return block_dim_;} + void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; @@ -72,6 +74,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + bool is_single_op_ = false; std::vector output_indices_to_skip_; }; diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc index b2996435..2bf2cb36 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc @@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector : op_desc_(op_desc), task_defs_(task_defs) { } -Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic) { +Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, + bool ignore_failure_on_atomic, + bool is_single_op) { GE_CHECK_NOTNULL(op_desc_); if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { GELOGE(INTERNAL_ERROR, @@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool i auto atomic_task = std::unique_ptr(new(std::nothrow)AtomicAddrCleanOpTask()); GE_CHECK_NOTNULL(atomic_task); + atomic_task->SetSingleOp(is_single_op); GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); @@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool i // build aicore task auto aicore_task = std::unique_ptr(new(std::nothrow)AiCoreOpTask()); GE_CHECK_NOTNULL(aicore_task); + aicore_task->SetSingleOp(is_single_op); GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.h b/ge/hybrid/node_executor/aicore/aicore_task_builder.h index 92db809d..8f95df15 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.h @@ -47,7 +47,7 @@ class AiCoreTaskBuilder { AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector &task_defs); ~AiCoreTaskBuilder() = default; - Status BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic); + Status BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic, bool is_single_op = false); private: bool ExpectAtomicAddrCleanTask(); diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index 50890d6a..d7d0f547 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -27,7 +27,7 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); -const std::unordered_map> +const std::map> RefInputTask::out_ref_input_index_ = {{DATA, {}}, {AIPPDATA, {}}, {RESHAPE, {}}, @@ -36,7 +36,7 @@ const std::unordered_map> {BROADCASTGRADIENTARGS, {}} }; -const std::unordered_set DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; +const std::set DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; Status RefInputTask::UpdateArgs(TaskContext &) { // no need update args diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h index 9de8d0f9..c8d64d09 100644 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h @@ -46,7 +46,7 @@ class RefInputTask : public NodeTask { // key is op type, value is output ref input index, // e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one - static const std::unordered_map> out_ref_input_index_; + static const std::map> out_ref_input_index_; }; class DependInputShapeTask : public NodeTask { @@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask { const NodePtr node_; // ops depend input shape - static const std::unordered_set depend_input_shape_ops_; + static const std::set depend_input_shape_ops_; }; class ConstantNodeTask : public NodeTask { diff --git a/ge/init/gelib.h b/ge/init/gelib.h index e52b8dd6..885ae867 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -31,7 +31,7 @@ using std::map; using std::vector; namespace ge { -class GELib { +class GE_FUNC_VISIBILITY GELib { public: GELib() = default; ~GELib() = default; diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc index 5b82f8f2..42a78dde 100755 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) { return ge::SUCCESS; } -bool CheckDynamicBatchSizeInputShapeValid(unordered_map> shape_map, +bool CheckDynamicBatchSizeInputShapeValid(map> shape_map, std::string &dynamic_batch_size) { int32_t size = 0; for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { @@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> return true; } -bool CheckDynamicImagesizeInputShapeValid(unordered_map> shape_map, +bool CheckDynamicImagesizeInputShapeValid(map> shape_map, const std::string input_format, std::string &dynamic_image_size) { if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); @@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> return true; } -bool CheckDynamicDimsInputShapeValid(const unordered_map> &shape_map, +bool CheckDynamicDimsInputShapeValid(const map> &shape_map, string input_format, string &dynamic_dims) { if (input_format != "ND") { ErrorManager::GetInstance().ATCReportErrMessage( @@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i return ge::SUCCESS; } - unordered_map> shape_map; + map> shape_map; vector>> user_shape_map; is_dynamic_input = true; if (input_shape.empty()) { @@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i return ge::SUCCESS; } -bool ParseInputShape(const string &input_shape, unordered_map> &shape_map, +bool ParseInputShape(const string &input_shape, map> &shape_map, vector>> &user_shape_map, bool is_dynamic_input) { vector shape_vec = StringUtils::Split(input_shape, ';'); const int DEFAULT_SHAPE_PAIR_SIZE = 2; diff --git a/ge/ir_build/atc_ir_common.h b/ge/ir_build/atc_ir_common.h index 2580a206..2ad4efa8 100644 --- a/ge/ir_build/atc_ir_common.h +++ b/ge/ir_build/atc_ir_common.h @@ -46,13 +46,13 @@ static std::map input_format_str_to_geformat = static const std::string kEnableCompressWeightTrue = "1"; static const std::string kEnableCompressWeightFalse = "0"; -bool CheckDynamicBatchSizeInputShapeValid(unordered_map> shape_map, +bool CheckDynamicBatchSizeInputShapeValid(map> shape_map, std::string &dynamic_batch_size); -bool CheckDynamicImagesizeInputShapeValid(unordered_map> shape_map, +bool CheckDynamicImagesizeInputShapeValid(map> shape_map, const std::string input_format, std::string &dynamic_image_size); -bool CheckDynamicDimsInputShapeValid(const std::unordered_map> &shape_map, +bool CheckDynamicDimsInputShapeValid(const std::map> &shape_map, std::string input_format, std::string &dynamic_dims); bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); @@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string std::string &dynamic_dims, const std::string input_shape, const std::string input_format, bool &is_dynamic_input); -bool ParseInputShape(const std::string &input_shape, std::unordered_map> &shape_map, +bool ParseInputShape(const std::string &input_shape, std::map> &shape_map, std::vector>> &user_shape_map, bool is_dynamic_input = false); Status CheckOutputTypeParamValid(const std::string output_type); diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 3d00ff7f..9197d52f 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { if (options_.find(kInputShape) == options_.end()) { return GRAPH_SUCCESS; } - unordered_map> shape_map; + map> shape_map; vector>> user_shape_map; GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), return GRAPH_PARAM_INVALID, "parse input shape failed!"); diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 3f8d43dc..0079576a 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE -O2 -Wno-deprecated-declarations -fno-common + -fvisibility=hidden ) target_compile_definitions(atc_atc.bin PRIVATE @@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE COMPILE_OMG_PACKAGE google=ascend_private LOG_CPP + FUNC_VISIBILITY ) target_include_directories(atc_atc.bin PRIVATE @@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) +target_link_options(atc_atc.bin PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(atc_atc.bin PRIVATE $ ascend_protobuf @@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE -O2 -Wno-deprecated-declarations -fno-common + -fvisibility=hidden ) target_compile_definitions(fwk_atc.bin PRIVATE @@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE COMPILE_OMG_PACKAGE google=ascend_private LOG_CPP + FUNC_VISIBILITY ) target_include_directories(fwk_atc.bin PRIVATE @@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) +target_link_options(fwk_atc.bin PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(fwk_atc.bin PRIVATE $ ascend_protobuf diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index 7a95ddfa..8e1dec28 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -23,7 +23,7 @@ namespace ge { using OpsKernelBuilderPtr = std::shared_ptr; -class OpsKernelBuilderManager { +class GE_FUNC_VISIBILITY OpsKernelBuilderManager { public: ~OpsKernelBuilderManager(); diff --git a/ge/opskernel_manager/ops_kernel_manager.h b/ge/opskernel_manager/ops_kernel_manager.h index b34c483e..19d703e3 100644 --- a/ge/opskernel_manager/ops_kernel_manager.h +++ b/ge/opskernel_manager/ops_kernel_manager.h @@ -41,7 +41,7 @@ using std::vector; namespace ge { using OpsKernelInfoStorePtr = std::shared_ptr; -class OpsKernelManager { +class GE_FUNC_VISIBILITY OpsKernelManager { public: friend class GELib; diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt index f6353231..e5736b51 100644 --- a/ge/plugin/engine/CMakeLists.txt +++ b/ge/plugin/engine/CMakeLists.txt @@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST}) target_compile_options(engine PRIVATE -Werror -fno-common + -fvisibility=hidden ) target_compile_definitions(engine PRIVATE REUSE_MEMORY=1 PROTOBUF_INLINE_NOT_IN_HEADERS=0 + FUNC_VISIBILITY ) target_include_directories(engine PRIVATE @@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE ${GE_CODE_DIR}/third_party/fwkacllib/inc ) +target_link_options(engine PRIVATE + -Wl,-Bsymbolic +) + target_link_libraries(engine PRIVATE $ -Wl,--no-as-needed diff --git a/ge/plugin/engine/dnnengines.h b/ge/plugin/engine/dnnengines.h index 4a2a9df5..0633c104 100644 --- a/ge/plugin/engine/dnnengines.h +++ b/ge/plugin/engine/dnnengines.h @@ -25,7 +25,7 @@ #include "plugin/engine/engine_manage.h" namespace ge { -class AICoreDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine { public: AICoreDNNEngine() = default; explicit AICoreDNNEngine(const std::string &engine_name); @@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; -class VectorCoreDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine { public: VectorCoreDNNEngine() = default; explicit VectorCoreDNNEngine(const std::string &engine_name); @@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine { }; -class AICpuDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine { public: AICpuDNNEngine() = default; explicit AICpuDNNEngine(const std::string &engine_name); @@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; -class AICpuTFDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine { public: AICpuTFDNNEngine() = default; explicit AICpuTFDNNEngine(const std::string &engine_name); @@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; -class GeLocalDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine { public: GeLocalDNNEngine() = default; explicit GeLocalDNNEngine(const std::string &engine_name); @@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; -class HostCpuDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine { public: HostCpuDNNEngine() = default; explicit HostCpuDNNEngine(const std::string &engine_name); @@ -116,7 +116,7 @@ private: DNNEngineAttribute engine_attribute_; }; -class RtsDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine { public: RtsDNNEngine() = default; explicit RtsDNNEngine(const std::string &engine_name); @@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; -class HcclDNNEngine : public DNNEngine { +class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine { public: HcclDNNEngine() = default; explicit HcclDNNEngine(const std::string &engine_name); diff --git a/ge/plugin/engine/engine_manage.h b/ge/plugin/engine/engine_manage.h index 5203ad3a..7eb88805 100644 --- a/ge/plugin/engine/engine_manage.h +++ b/ge/plugin/engine/engine_manage.h @@ -17,6 +17,20 @@ #ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ #define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -26,7 +40,7 @@ namespace ge { using DNNEnginePtr = std::shared_ptr; -class EngineManager { +class GE_FUNC_VISIBILITY EngineManager { public: static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr); static DNNEnginePtr GetEngine(const std::string &engine_name); @@ -34,7 +48,7 @@ class EngineManager { }; extern "C" { -void GetDNNEngineObjs(std::map &engines); +GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map &engines); } } // namespace ge #endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 5a67f7cd..6a56fc05 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -77,6 +77,23 @@ Status InnerSession::Initialize() { UpdateThreadContext(std::map{}); + // session device id set here + std::string str_session_device_id; + if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) { + GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str()); + + uint32_t session_device_id = 0; + try { + session_device_id = static_cast(std::stoi(str_session_device_id.c_str())); + // session device id has priority + GetContext().SetCtxDeviceId(session_device_id); + } catch (std::invalid_argument &) { + GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); + } catch (std::out_of_range &) { + GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); + } + } + GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); DumpProperties dump_properties; diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 47073fc0..368b4bec 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, } // Analyze the input shape paramete - unordered_map> &shape_map = domi::GetContext().input_dims; + map> &shape_map = domi::GetContext().input_dims; if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, is_dynamic_input) || @@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) { /// static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) { GE_CHECK_NOTNULL(graph); - unordered_map graphNodeTypes; + map graphNodeTypes; for (const NodePtr &node : graph->GetAllNodes()) { auto op_desc = node->GetOpDesc(); if (op_desc == nullptr) { diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 4f32bd6b..168ca2c5 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, vector &output_buffers) { - GE_CHECK_NOTNULL(op_task_); GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); + if (hybrid_model_executor_ != nullptr) { + GELOGD("Execute multi-task dynamic single op by hybrid model executor"); + hybrid::HybridModelExecutor::ExecuteArgs args; + for (auto &input : input_buffers) { + args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); + } + for (auto &output : output_buffers) { + args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); + } + for (auto &tensor_desc : input_desc) { + auto desc = MakeShared(tensor_desc); + GE_CHECK_NOTNULL(desc); + args.input_desc.emplace_back(desc); + } + + return hybrid_model_executor_->Execute(args); + } + std::lock_guard lk(*stream_mutex_); + GE_CHECK_NOTNULL(op_task_); GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index d677f94a..b350b684 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -28,6 +28,7 @@ #include "runtime/stream.h" #include "task/op_task.h" #include "cce/aicpu_engine_struct.h" +#include "hybrid/executor/hybrid_model_executor.h" namespace ge { class StreamResource; @@ -46,7 +47,7 @@ class SingleOp { Status GetArgs(const std::vector &inputs, const std::vector &outputs); friend class SingleOpModel; - StreamResource *stream_resource_; + StreamResource *stream_resource_ = nullptr; std::mutex *stream_mutex_; rtStream_t stream_ = nullptr; std::vector input_addr_list_; @@ -77,6 +78,8 @@ class DynamicSingleOp { std::vector &outputs) const; std::unique_ptr op_task_; + std::unique_ptr hybrid_model_; + std::unique_ptr hybrid_model_executor_; uintptr_t resource_id_ = 0; std::mutex *stream_mutex_; rtStream_t stream_ = nullptr; diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 7d092091..1b776cc8 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -31,6 +31,8 @@ #include "task/aicpu_task_builder.h" #include "task/aicpu_kernel_task_builder.h" #include "task/tbe_task_builder.h" +#include "hybrid/executor/hybrid_model_executor.h" +#include "hybrid/node_executor/node_executor.h" static std::atomic aicpu_kernel_id(0); @@ -42,6 +44,20 @@ namespace ge { namespace { const size_t kDataOutputNum = 1; } // namespace +static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { + auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); + for (const auto &node : comp_graph->GetAllNodes()) { + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &depends = op_desc->GetOpInferDepends(); + if (!depends.empty()) { + flag = true; + return SUCCESS; + } + } + return SUCCESS; +} + SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} @@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); model_params_.memory_size = UINT_MAX; + + auto ge_model = model_helper_.GetGeModel(); + GE_CHECK_NOTNULL(ge_model); + bool infer_depend_flag = false; + GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag)); + if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) { + GELOGD("Build single op HybridModel."); + GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); + auto root_model = model_helper_.GetGeRootModel(); + GE_CHECK_NOTNULL(root_model); + root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); + root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); + single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); + GE_CHECK_NOTNULL(single_op.hybrid_model_); + GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model"); + int32_t device_id = 0; + GE_CHK_RT_RET(rtGetDevice(&device_id)); + single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), + device_id, + resource.GetStream())); + GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); + GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); + return SUCCESS; + } return BuildTaskListForDynamicOp(single_op); } } // namespace ge diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc index db6b7c47..a3acf6b7 100755 --- a/ge/single_op/stream_resource.cc +++ b/ge/single_op/stream_resource.cc @@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { return it->second.get(); } +rtStream_t StreamResource::GetStream() const { + return stream_; +} + void StreamResource::SetStream(rtStream_t stream) { stream_ = stream; } diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h index d5bc941a..d2c1ca36 100755 --- a/ge/single_op/stream_resource.h +++ b/ge/single_op/stream_resource.h @@ -37,6 +37,7 @@ class StreamResource { StreamResource(StreamResource &&) = delete; StreamResource &operator=(const StreamResource &) = delete; StreamResource &operator=(StreamResource &&) = delete; + rtStream_t GetStream() const; void SetStream(rtStream_t stream); SingleOp *GetOperator(const void *key); diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py index 1476d505..f20d23a8 100644 --- a/ge/stub/gen_stubapi.py +++ b/ge/stub/gen_stubapi.py @@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve """ this attr is used for symbol table visible """ -GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' +GE_ATTR = 'GE_FUNC_VISIBILITY' """ generate stub func body by return type diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h index 9c26ebf8..cd4ca323 100644 --- a/inc/external/ge/ge_api.h +++ b/inc/external/ge/ge_api.h @@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map &)) -Status GEInitialize(const std::map &options); +ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map &)) +GE_FUNC_VISIBILITY Status GEInitialize(const std::map &options); -Status GEInitialize(const std::map &options); +GE_FUNC_VISIBILITY Status GEInitialize(const std::map &options); // Finalize GE, release all resources -Status GEFinalize(); +GE_FUNC_VISIBILITY Status GEFinalize(); -class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { +class GE_FUNC_VISIBILITY Session { public: ATTRIBUTED_DEPRECATED(Session(const std::map &)) explicit Session(const std::map &options); diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index e77f817c..274a9784 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -28,7 +28,7 @@ namespace ge { #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) #endif -class StatusFactory { +class GE_FUNC_VISIBILITY StatusFactory { public: static StatusFactory *Instance() { static StatusFactory instance; @@ -70,7 +70,7 @@ class StatusFactory { std::map err_desc_; }; -class ErrorNoRegisterar { +class GE_FUNC_VISIBILITY ErrorNoRegisterar { public: ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h index 041fc7ae..b477a18c 100644 --- a/inc/external/ge/ge_error_codes.h +++ b/inc/external/ge/ge_error_codes.h @@ -17,6 +17,20 @@ #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #ifdef __cplusplus diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index afaf42ac..889e2bea 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -17,6 +17,20 @@ #ifndef INC_EXTERNAL_GE_IR_BUILD_H_ #define INC_EXTERNAL_GE_IR_BUILD_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -44,17 +58,17 @@ struct ModelBufferData { * @retval GRAPH_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map &)) -graphStatus aclgrphBuildInitialize(std::map global_options); +ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map &)) +GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map global_options); -graphStatus aclgrphBuildInitialize(std::map &global_options); +GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map &global_options); /** * @ingroup AscendCL * @brief build model.Notice the model is stored in buffer * */ -void aclgrphBuildFinalize(); +GE_FUNC_VISIBILITY void aclgrphBuildFinalize(); /** * @ingroup AscendCL @@ -66,12 +80,12 @@ void aclgrphBuildFinalize(); * @retval GRAPH_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map &, +ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map &, ModelBufferData &)) -graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, +GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, ModelBufferData &model); -graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, +GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, ModelBufferData &model); /** @@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &inputs, +GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector &inputs, const std::vector &outputs, Graph &graph); }; // namespace ge diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index 07cd1664..c1359a20 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -37,7 +37,7 @@ extern "C" { // trace status of log enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; -class GeLog { +class GE_FUNC_VISIBILITY GeLog { public: static uint64_t GetTid() { #ifdef __GNUC__ diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 31281cd6..58cb3693 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -278,7 +278,7 @@ } while (0) template -std::string FmtToStr(const T &t) { +GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { std::string fmt; std::stringstream st; st << "[" << t << "]"; diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h index 358fca04..e910e346 100644 --- a/inc/framework/common/fmk_error_codes.h +++ b/inc/framework/common/fmk_error_codes.h @@ -17,6 +17,20 @@ #ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ #define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include @@ -38,7 +52,7 @@ const int MODID_OME = 2; // OME module ID const int MODID_CALIBRATION = 3; // Calibration module ID namespace domi { -class StatusFactory { +class GE_FUNC_VISIBILITY StatusFactory { public: static StatusFactory *Instance(); @@ -54,7 +68,7 @@ class StatusFactory { std::map err_desc_; }; -class ErrorNoRegisterar { +class GE_FUNC_VISIBILITY ErrorNoRegisterar { public: ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } ~ErrorNoRegisterar() {} diff --git a/inc/framework/common/ge_format_util.h b/inc/framework/common/ge_format_util.h index 9b1d7786..dfceefb8 100644 --- a/inc/framework/common/ge_format_util.h +++ b/inc/framework/common/ge_format_util.h @@ -23,7 +23,7 @@ #include "graph/tensor.h" namespace ge { -class GeFormatUtil { +class GE_FUNC_VISIBILITY GeFormatUtil { public: /// /// @name TransShape diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 9ca77f1c..ec5adcba 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -215,7 +215,7 @@ struct ModelInfo { }; // Asynchronous callback interface, implemented by the caller -class ModelListener { +class GE_FUNC_VISIBILITY ModelListener { public: virtual ~ModelListener() {} /// diff --git a/inc/framework/common/gflags_util.h b/inc/framework/common/gflags_util.h index 94d66ffb..6e9ea41b 100644 --- a/inc/framework/common/gflags_util.h +++ b/inc/framework/common/gflags_util.h @@ -17,11 +17,25 @@ #ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ #define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include namespace ge { -class GflagsUtils { +class GE_FUNC_VISIBILITY GflagsUtils { public: static bool IsSetCommandTrue(const char *name) { std::string out; diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index 4a169dda..e25d5d6f 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -28,7 +28,7 @@ #include "model/ge_root_model.h" namespace ge { -class ModelHelper { +class GE_FUNC_VISIBILITY ModelHelper { public: ModelHelper() = default; ~ModelHelper(); diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index 98ad55d7..34509b39 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -51,7 +51,7 @@ struct SaveParam { std::string model_name; }; -class OmFileLoadHelper { +class GE_FUNC_VISIBILITY OmFileLoadHelper { public: Status Init(const ge::ModelData &model); @@ -77,7 +77,7 @@ class OmFileLoadHelper { bool is_inited_{false}; }; -class OmFileSaveHelper { +class GE_FUNC_VISIBILITY OmFileSaveHelper { public: ModelFileHeader &GetModelFileHeader() { return model_header_; } diff --git a/inc/framework/common/l2_cache_optimize.h b/inc/framework/common/l2_cache_optimize.h index c65f67b3..fdb1c8b5 100644 --- a/inc/framework/common/l2_cache_optimize.h +++ b/inc/framework/common/l2_cache_optimize.h @@ -69,7 +69,7 @@ struct RCMemoryBlock { }; // L2Cache optimizer -class L2CacheOptimize { +class GE_FUNC_VISIBILITY L2CacheOptimize { public: explicit L2CacheOptimize(ge::ComputeGraphPtr &graph); ~L2CacheOptimize(); diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h index e3803b78..28d48c1d 100644 --- a/inc/framework/common/op/attr_value_util.h +++ b/inc/framework/common/op/attr_value_util.h @@ -17,6 +17,20 @@ #ifndef INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ #define INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include #include @@ -34,127 +48,127 @@ namespace ge { using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; using AttrDefPair = ::google::protobuf::MapPair; -void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef); // DEFINE_ADD_ATTR_VALUE -void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs); -void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs); -void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs); - -void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs); + +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs); // DEFINE_ADD_ATTR_VALUE -void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef); -void AddOpAttr(const std::string &key, const char *value, OpDef *opdef); -void AddOpAttr(const char *key, const char *value, OpDef *opdef); -void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef); -void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef); -void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef); -void AddOpAttr(const std::string &key, const float value, OpDef *opdef); -void AddOpAttr(const std::string &key, const double value, OpDef *opdef); -void AddOpAttr(const std::string &key, const bool value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef); -void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef); -void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef); // DEFINE_ADD_ATTR_VALUE_LIST -void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs); -void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs); -void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs); -void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs); -void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs); -void AddOpAttrList(const std::string &key, const double value, OpDef *opdef); -void AddOpAttrList(const std::string &key, const float value, OpDef *opdef); -void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef); -void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef); -void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef); -void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef); - -void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef); - -bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef); -bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef); - -uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef); -uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef); -uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef); -uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef); -uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef); -uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef); -uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef); - -bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef); -bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def); - -void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def); -void AddModelAttr(const char *key, const char *value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const float value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const double value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def); -void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def); -void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def); - -void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def); -void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def); -void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def); -void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def); -void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def); - -bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def); -bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def); - -bool HasOpAttr(const OpDef *opdef, const std::string &attr_name); - -void SetAttrDef(const std::string &value, AttrDef *out); -void SetAttrDef(const char *value, AttrDef *out); -void SetAttrDef(const uint32_t value, AttrDef *out); -void SetAttrDef(const int32_t value, AttrDef *out); -void SetAttrDef(const float value, AttrDef *out); -void SetAttrDef(const double value, AttrDef *out); -void SetAttrDef(const bool value, AttrDef *out); -void SetAttrList(const std::string &value, AttrDef *out); -void SetAttrList(const bool value, AttrDef *out); -void SetAttrList(const float value, AttrDef *out); -void SetAttrList(const double value, AttrDef *out); -void SetAttrList(const uint32_t value, AttrDef *out); - -bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr); -bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr); -bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr); - -bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr); -bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr); -bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr); -bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef); +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef); + +GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef); + +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef); + +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef); +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef); +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef); +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef); +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef); +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef); +GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef); + +GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef); +GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def); + +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def); + +GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def); +GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def); + +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def); +GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def); + +GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name); + +GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out); +GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out); + +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr); +GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr); + +GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr); +GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr); } #endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index aa50c8a1..89529520 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -34,36 +34,36 @@ namespace ge { using domi::Status; // Add Sub Mul -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t ADD_INPUT_NUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SUB_INPUT_NUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MUL_INPUT_NUM; +GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM; +GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM; +GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM; // Permute -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM; +GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM; // Ssd PriroBox -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE; +GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM; +GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM; // Switch -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_INPUT_NUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_OUTPUT_NUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_FALSE_OUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_TRUE_OUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t SWITCH_INPUT_NUM; +GE_FUNC_VISIBILITY extern const uint32_t SWITCH_OUTPUT_NUM; +GE_FUNC_VISIBILITY extern const uint32_t SWITCH_FALSE_OUTPUT; +GE_FUNC_VISIBILITY extern const uint32_t SWITCH_TRUE_OUTPUT; +GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; // FunctionOp -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t IF_COND_INPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_START_INPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_DELTA_INPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t FOR_DATA_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t FOR_START_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t FOR_LIMIT_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT; +GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int NORMAL_TENSOR_SIZE; +GE_FUNC_VISIBILITY extern const int NORMAL_TENSOR_SIZE; -class OpUtils { +class GE_FUNC_VISIBILITY OpUtils { public: /// /// @ingroup domi_ome diff --git a/inc/framework/common/op_types.h b/inc/framework/common/op_types.h index 4555d5c3..fa41c1b6 100644 --- a/inc/framework/common/op_types.h +++ b/inc/framework/common/op_types.h @@ -21,7 +21,7 @@ #include namespace ge { -class OpTypeContainer { +class GE_FUNC_VISIBILITY OpTypeContainer { public: static OpTypeContainer *Instance() { static OpTypeContainer instance; @@ -43,7 +43,7 @@ class OpTypeContainer { std::set op_type_list_; }; -class OpTypeRegistrar { +class GE_FUNC_VISIBILITY OpTypeRegistrar { public: explicit OpTypeRegistrar(const std::string &op_type) { OpTypeContainer::Instance()->Register(op_type); } ~OpTypeRegistrar() {} diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index 83699754..7017aca3 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -38,9 +38,9 @@ struct ProfCommandHandleData { uint32_t modelId; }; -ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); -ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); -ge::Status RegProfReporterCallback(MsprofReporterCallback func); -ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); +GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); +GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); +GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func); +GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); #endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ diff --git a/inc/framework/common/profiling/ge_runner_profiling.h b/inc/framework/common/profiling/ge_runner_profiling.h index d2eff767..011797a3 100644 --- a/inc/framework/common/profiling/ge_runner_profiling.h +++ b/inc/framework/common/profiling/ge_runner_profiling.h @@ -19,6 +19,6 @@ #include "profiling/ge_profiling.h" -bool IsInitialize(); +GE_FUNC_VISIBILITY bool IsInitialize(); #endif // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h index 001a0e75..62ae4b6d 100644 --- a/inc/framework/common/scope_guard.h +++ b/inc/framework/common/scope_guard.h @@ -29,7 +29,7 @@ #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() namespace ge { -class ScopeGuard { +class GE_FUNC_VISIBILITY ScopeGuard { public: // Noncopyable ScopeGuard(ScopeGuard const &) = delete; diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index de19807c..f0368363 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -17,6 +17,20 @@ #ifndef INC_FRAMEWORK_COMMON_STRING_UTIL_H_ #define INC_FRAMEWORK_COMMON_STRING_UTIL_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #include @@ -27,7 +41,7 @@ #include namespace ge { -class StringUtils { +class GE_FUNC_VISIBILITY StringUtils { public: static std::string &Ltrim(std::string &s) { #if __cplusplus >= 201103L diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index 42ab3868..525cf3ea 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -237,7 +237,7 @@ const int32_t DOMI_MAX_PATH_LEN = 256; /// @return true success /// @return false fail /// -bool ReadProtoFromBinaryFile(const char *file, Message *proto); +GE_FUNC_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto); /// /// @ingroup domi_common @@ -248,7 +248,7 @@ bool ReadProtoFromBinaryFile(const char *file, Message *proto); /// @return true success /// @return false fail /// -bool ReadProtoFromArray(const void *data, int size, Message *proto); +GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *proto); /// /// @ingroup domi_proto @@ -258,9 +258,9 @@ bool ReadProtoFromArray(const void *data, int size, Message *proto); /// @return true success /// @return false fail /// -bool ReadProtoFromText(const char *file, google::protobuf::Message *message); +GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message); -bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message); +GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message); /// /// @ingroup: domi_common @@ -268,7 +268,7 @@ bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *mes /// @param [in] input_file: path of file /// @return long: File length. If the file length fails to be obtained, the value -1 is returned. /// -extern long GetFileLength(const std::string &input_file); +GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file); /// /// @ingroup domi_common @@ -279,9 +279,9 @@ extern long GetFileLength(const std::string &input_file); /// @return false fail /// @return true success /// -bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length); +GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, char **buffer, int &length); -bool ReadBytesFromBinaryFile(const char *file_name, std::vector &buffer); +GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector &buffer); /// /// @ingroup domi_common @@ -290,14 +290,14 @@ bool ReadBytesFromBinaryFile(const char *file_name, std::vector &buffer); /// @return 0 success /// @return -1 fail /// -extern int CreateDirectory(const std::string &directory_path); +GE_FUNC_VISIBILITY extern int CreateDirectory(const std::string &directory_path); /// /// @ingroup domi_common /// @brief Obtains the current time string. /// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 /// -std::string CurrentTimeInStr(); +GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); /// /// @ingroup domi_common @@ -306,7 +306,7 @@ std::string CurrentTimeInStr(); /// @return string /// template -std::string ToString(std::vector &v) { +GE_FUNC_VISIBILITY std::string ToString(std::vector &v) { std::stringstream ss; ss << "["; for (T x : v) { @@ -326,7 +326,7 @@ std::string ToString(std::vector &v) { /// @return string /// template -std::string ToString(const google::protobuf::RepeatedField &rpd_field) { +GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField &rpd_field) { std::stringstream ss; ss << "["; for (T x : rpd_field) { @@ -345,7 +345,7 @@ std::string ToString(const google::protobuf::RepeatedField &rpd_field) { /// @return Timestamp, in microseconds (US) /// /// -uint64_t GetCurrentTimestamp(); +GE_FUNC_VISIBILITY uint64_t GetCurrentTimestamp(); /// /// @ingroup domi_common @@ -353,7 +353,7 @@ uint64_t GetCurrentTimestamp(); /// @return Timestamp, in seconds (US) /// /// -uint32_t GetCurrentSecondTimestap(); +GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap(); /// /// @ingroup domi_common @@ -362,7 +362,7 @@ uint32_t GetCurrentSecondTimestap(); /// @param [in] b /// @return false: true: The result is within the normal int64 range. /// -bool CheckInt64MulOverflow(int64_t a, int64_t b); +GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); /// /// @ingroup domi_common @@ -370,7 +370,7 @@ bool CheckInt64MulOverflow(int64_t a, int64_t b); /// @param [in] path of input file /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned /// -std::string RealPath(const char *path); +GE_FUNC_VISIBILITY std::string RealPath(const char *path); /// /// @ingroup domi_common @@ -381,7 +381,7 @@ std::string RealPath(const char *path); /// @param [in] file_path path of input file /// @param [out] result /// -bool CheckInputPathValid(const std::string &file_path, const std::string &atc_param = ""); +GE_FUNC_VISIBILITY bool CheckInputPathValid(const std::string &file_path, const std::string &atc_param = ""); /// /// @ingroup domi_common @@ -389,7 +389,7 @@ bool CheckInputPathValid(const std::string &file_path, const std::string &atc_pa /// @param [in] file_path path of output file /// @param [out] result /// -bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_param = ""); +GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_param = ""); /// /// @ingroup domi_common @@ -397,7 +397,7 @@ bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_p /// @param [in] filePath file path /// @param [out] result /// -bool ValidateStr(const std::string &filePath, const std::string &mode); +GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::string &mode); /// /// @ingroup domi_common @@ -405,7 +405,7 @@ bool ValidateStr(const std::string &filePath, const std::string &mode); /// @param [in] file_path file path /// @param [out] result /// -bool IsValidFile(const char *file_path); +GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path); /// /// @ingroup domi_common @@ -415,7 +415,7 @@ bool IsValidFile(const char *file_path); /// @return 0 success /// @return -1 fail /// -Status CheckPath(const char *path, size_t length); +GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length); } // namespace ge #endif // INC_FRAMEWORK_COMMON_UTIL_H_ diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h index 1bcf5e07..8a0f3b65 100644 --- a/inc/framework/engine/dnnengine.h +++ b/inc/framework/engine/dnnengine.h @@ -45,7 +45,7 @@ struct DNNEngineAttribute { Format engine_output_format; }; -class DNNEngine { +class GE_FUNC_VISIBILITY DNNEngine { public: virtual ~DNNEngine() = default; virtual Status Initialize(const std::map &options) = 0; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 3136e172..c546f63d 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -46,7 +46,7 @@ struct RunModelData { std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; -class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { +class GE_FUNC_VISIBILITY GeExecutor { public: GeExecutor(); ~GeExecutor() = default; diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index e0904965..2d7d007b 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -31,7 +31,7 @@ #include "omg/omg_inner_types.h" namespace ge { -class GeGenerator { +class GE_FUNC_VISIBILITY GeGenerator { public: static GeGenerator &GetInstance() { static GeGenerator Instance; diff --git a/inc/framework/generator/generator_api.h b/inc/framework/generator/generator_api.h index 71c6832e..d44edd0c 100644 --- a/inc/framework/generator/generator_api.h +++ b/inc/framework/generator/generator_api.h @@ -17,6 +17,20 @@ #ifndef INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_ #define INC_FRAMEWORK_GENERATOR_GENERATOR_API_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #ifdef __cplusplus @@ -40,7 +54,7 @@ typedef void *OpTensor_t; /// @param [in] om_file: file name for the om to save. /// @return 0 for success / others for fail /// -extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, +GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); /// @@ -52,7 +66,7 @@ extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tenso /// @param [in] num: number of shape. /// @return OpTensor_t for success / nullptr for failure /// -extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num); +GE_FUNC_VISIBILITY extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num); /// /// @ingroup ge @@ -61,7 +75,7 @@ extern OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, /// @param [out] none /// @return 0 for success / others for failure. /// -extern Status_t OpTensorDestroy(OpTensor_t tensor); +GE_FUNC_VISIBILITY extern Status_t OpTensorDestroy(OpTensor_t tensor); /// /// @ingroup ge @@ -70,7 +84,7 @@ extern Status_t OpTensorDestroy(OpTensor_t tensor); /// @param [out] none /// @return OpAttr_t for success / nullptr for failure. /// -extern OpAttr_t OpAttrCreate(); +GE_FUNC_VISIBILITY extern OpAttr_t OpAttrCreate(); /// /// @ingroup ge @@ -79,7 +93,7 @@ extern OpAttr_t OpAttrCreate(); /// @param [out] none /// @return 0 for success / others for failure. /// -extern Status_t OpAttrDestroy(OpAttr_t attr); +GE_FUNC_VISIBILITY extern Status_t OpAttrDestroy(OpAttr_t attr); /// /// @ingroup ge @@ -89,7 +103,7 @@ extern Status_t OpAttrDestroy(OpAttr_t attr); /// @param [in] value: attributed value. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value); +GE_FUNC_VISIBILITY extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value); /// /// @ingroup ge @@ -99,7 +113,7 @@ extern Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value); /// @param [in] value: attribute value. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value); +GE_FUNC_VISIBILITY extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value); /// /// @ingroup ge @@ -109,7 +123,7 @@ extern Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value); /// @param [in] value: attribute value. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value); +GE_FUNC_VISIBILITY extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value); /// /// @ingroup ge @@ -119,7 +133,7 @@ extern Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value); /// @param [in] value: attribute value (can`t be nullptr, end with '\0'). /// @return 0 for success / others for failure. /// -extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value); +GE_FUNC_VISIBILITY extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value); /// /// @ingroup ge @@ -130,7 +144,7 @@ extern Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value /// @param [in] num: number of value array. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num); +GE_FUNC_VISIBILITY extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num); /// /// @ingroup ge @@ -141,7 +155,7 @@ extern Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *val /// @param [in] num: number of value array. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num); +GE_FUNC_VISIBILITY extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num); /// /// @ingroup ge @@ -152,7 +166,7 @@ extern Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *v /// @param [in] num: number of value array. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num); +GE_FUNC_VISIBILITY extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num); /// /// @ingroup ge @@ -163,7 +177,7 @@ extern Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *v /// @param [in] num: number of value array. /// @return 0 for success / others for failure. /// -extern Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num); +GE_FUNC_VISIBILITY extern Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num); #ifdef __cplusplus } diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index d8b06125..34e596a2 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -46,26 +46,26 @@ struct TensorInfo { /// \param size [in] rdma pool memory size to be allocated. /// \param mem_type [in] memory type for rdma pool. /// \return Status result of function -Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); +GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); /// /// \param var_info [in] host variable addr infos. /// \param mem_type [in] memory type for rdma pool. /// \return Status result of function -Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); +GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); /// /// \param tensor_info [in] description for tensor stored shared memory. /// \param dev_addr [out] malloced shared memory addr. /// \param memory_size [out] malloced shared memory size. /// \return Status result of function -Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); +GE_FUNC_VISIBILITY Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); /// /// \param var_name [in] var_name name of host variable. /// \param base_addr [out] base_addr vase addr of host variable. /// \param var_size [out] var_size memory_size of host variable. /// \return Status result of function -Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); +GE_FUNC_VISIBILITY Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); } // namespace ge #endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_ diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index 4552fa7c..f5837b3a 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -24,7 +24,7 @@ namespace ge { const int64_t MEM_ALIGN_SIZE = 512; -class MemoryAssigner { +class GE_FUNC_VISIBILITY MemoryAssigner { public: explicit MemoryAssigner(ge::ComputeGraphPtr compute_graph) : compute_graph_(std::move(compute_graph)) {} virtual ~MemoryAssigner() = default; diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index 62332b8d..eb25f919 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -43,7 +43,7 @@ namespace ge { * @brief init omg context * @return void */ -Status InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, +GE_FUNC_VISIBILITY Status InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); /** @@ -61,7 +61,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, * @param [in] atc_params multiply atc params * @return Status result code */ -Status ParseGraph(ge::Graph &graph, const std::map &atc_params, const char *model_file, +GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map &atc_params, const char *model_file, const char *weights_file, domi::FrameworkType type, const char *op_conf = nullptr, const char *target = nullptr, RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false); @@ -73,9 +73,9 @@ Status ParseGraph(ge::Graph &graph, const std::map &atc_params, * @param [key] encrypted key * @return Status result code */ -Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json); +GE_FUNC_VISIBILITY Status ConvertOm(const char *model_file, const char *json_file, bool is_covert_to_json); -Status ConvertPbtxtToJson(const char *model_file, const char *json_file); +GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char *json_file); /** * @ingroup domi_omg * @brief convert the model file in protobuf format into a JSON file. @@ -85,26 +85,26 @@ Status ConvertPbtxtToJson(const char *model_file, const char *json_file); * @param [key] encrypted key * @return Status result code */ -Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, const char *json_file); +GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, const char *json_file); -void GetGroupName(ge::proto::ModelDef &model); +GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model); -void FindParserSo(const string &path, vector &fileList, string &caffe_parser_path); +GE_FUNC_VISIBILITY void FindParserSo(const string &path, vector &fileList, string &caffe_parser_path); -Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); +GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); -Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); +GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); -Status GetOutputLeaf(ge::NodePtr node, std::vector> &output_nodes_info); +GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node, std::vector> &output_nodes_info); -void GetOutputNodesNameAndIndex(std::vector> &output_nodes_info, +GE_FUNC_VISIBILITY void GetOutputNodesNameAndIndex(std::vector> &output_nodes_info, std::vector &output_nodes_name); -void UpdateOmgCtxWithParserCtx(); +GE_FUNC_VISIBILITY void UpdateOmgCtxWithParserCtx(); -void UpdateParserCtxWithOmgCtx(); +GE_FUNC_VISIBILITY void UpdateParserCtxWithOmgCtx(); -void PrintModelInfo(ge::proto::ModelDef *model_def); +GE_FUNC_VISIBILITY void PrintModelInfo(ge::proto::ModelDef *model_def); } // namespace ge namespace domi { @@ -113,7 +113,7 @@ namespace domi { * @brief get omg context * @return reference of OmgContext */ -ge::OmgContext &GetContext(); +GE_FUNC_VISIBILITY ge::OmgContext &GetContext(); } // namespace domi #endif // INC_FRAMEWORK_OMG_OMG_H_ diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 1049b6b5..54c9ab4a 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -83,7 +83,7 @@ struct OmgContext { // user-designate input dims std::vector>> user_input_dims; // global input dims - std::unordered_map> input_dims; + std::map> input_dims; // resolve the mapping between operators with the same name and corresponding network. format e.g. // Detectionoutput:SsdDetectiontOutput @@ -132,7 +132,7 @@ namespace domi { * @brief get OMG context * @return OmgContext context */ -ge::OmgContext &GetContext(); +GE_FUNC_VISIBILITY ge::OmgContext &GetContext(); struct TEBinInfo { // It is obsolete. It will be automatically obtained from the binfilename field of the JSON file later. diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 9eda685d..8fae5556 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -37,7 +37,7 @@ using Status = domi::Status; namespace domi { using GetGraphCallback = std::function( const google::protobuf::Message *root_proto, const std::string &graph)>; -class ModelParser { +class GE_FUNC_VISIBILITY ModelParser { public: ModelParser() {} diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h index 087bad32..70bec218 100644 --- a/inc/framework/omg/parser/op_parser.h +++ b/inc/framework/omg/parser/op_parser.h @@ -34,7 +34,7 @@ namespace ge { * @brief Used to analyze operator information * */ -class OpParser { +class GE_FUNC_VISIBILITY OpParser { public: /** * @ingroup domi_omg diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h index 382bdfde..6c223665 100644 --- a/inc/framework/omg/parser/parser_api.h +++ b/inc/framework/omg/parser/parser_api.h @@ -24,8 +24,8 @@ namespace ge { // Initialize parser -Status ParserInitialize(const std::map& options); +GE_FUNC_VISIBILITY Status ParserInitialize(const std::map& options); // Finalize parser, release all resources -Status ParserFinalize(); +GE_FUNC_VISIBILITY Status ParserFinalize(); } // namespace ge #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h index 4845606f..9d6590c0 100644 --- a/inc/framework/omg/parser/parser_factory.h +++ b/inc/framework/omg/parser/parser_factory.h @@ -33,7 +33,7 @@ class ModelParser; typedef std::shared_ptr (*MODEL_PARSER_CREATOR_FUN)(void); // Create modelparser for different frameworks -class ModelParserFactory { +class GE_FUNC_VISIBILITY ModelParserFactory { public: static ModelParserFactory *Instance(); @@ -61,7 +61,7 @@ class ModelParserFactory { std::map creator_map_; }; // end class ModelParserFactory -class ModelParserRegisterar { +class GE_FUNC_VISIBILITY ModelParserRegisterar { public: ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) { ModelParserFactory::Instance()->RegisterCreator(type, fun); @@ -85,7 +85,7 @@ class ModelParserRegisterar { typedef std::shared_ptr (*WEIGHTS_PARSER_CREATOR_FUN)(void); // Create weightsparser for different frameworks -class WeightsParserFactory { +class GE_FUNC_VISIBILITY WeightsParserFactory { public: static WeightsParserFactory *Instance(); @@ -113,7 +113,7 @@ class WeightsParserFactory { std::map creator_map_; }; // end class WeightsParserFactory -class WeightsParserRegisterar { +class GE_FUNC_VISIBILITY WeightsParserRegisterar { public: WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) { WeightsParserFactory::Instance()->RegisterCreator(type, fun); diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h index 5d91bd46..b23da53f 100644 --- a/inc/framework/omg/parser/parser_inner_ctx.h +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -34,7 +34,7 @@ struct ParserContext { std::vector output_formats; // user-designate input dims std::vector>> user_input_dims; - std::unordered_map> input_dims; + std::map> input_dims; // resolve the mapping between operators with the same name and corresponding network. format e.g. // Detectionoutput:SsdDetectiontOutput std::map op_conf_map; @@ -68,7 +68,7 @@ struct ParserContext { std::string enable_scope_fusion_passes; }; -ParserContext &GetParserContext(); +GE_FUNC_VISIBILITY ParserContext &GetParserContext(); } // namespace ge #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h index 1b5216b3..e4436044 100644 --- a/inc/framework/omg/parser/weights_parser.h +++ b/inc/framework/omg/parser/weights_parser.h @@ -34,7 +34,7 @@ namespace domi { * @brief Weight information resolver * */ -class WeightsParser { +class GE_FUNC_VISIBILITY WeightsParser { public: /** * @ingroup domi_omg diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h index ac649d83..4facba0d 100644 --- a/inc/framework/omg/version.h +++ b/inc/framework/omg/version.h @@ -27,7 +27,7 @@ #include "framework/common/debug/ge_log.h" namespace ge { -class PlatformVersionManager { +class GE_FUNC_VISIBILITY PlatformVersionManager { public: PlatformVersionManager() = delete; ~PlatformVersionManager() = delete; diff --git a/metadef b/metadef index c86433f1..25967258 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit c86433f19f6df542adaa5d444ea9bc52d96e6371 +Subproject commit 2596725889c19c60a03440ab9e4e313070326ec0 diff --git a/parser b/parser index 98f17f4a..6516132e 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 98f17f4a2a37f283797858eabefa9dba1d06a66b +Subproject commit 6516132e2eaeea2bf51cc790d52c83709588f5d8 diff --git a/tests/depends/omg/src/omg_stub.cc b/tests/depends/omg/src/omg_stub.cc index 13ddf8bb..811db2d2 100644 --- a/tests/depends/omg/src/omg_stub.cc +++ b/tests/depends/omg/src/omg_stub.cc @@ -401,7 +401,7 @@ struct OmgContext { // user-designate input dims std::vector>> user_input_dims; // global input dims - std::unordered_map> input_dims; + std::map> input_dims; // solve rename op e.g: Detectionoutput:SsdDetectiontOutput std::map op_conf_map; diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 9b45e7e2..1a170167 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -245,9 +245,35 @@ rtError_t rtProfilerInit(const char *prof_dir, const char *address, const char * rtError_t rtProfilerStart(void) { return RT_ERROR_NONE; } -rtError_t rtLabelCreate(rtLabel_t *label) { return RT_ERROR_NONE; } +rtError_t rtLabelCreate(rtLabel_t *label) { + *label = new uint64_t; + return RT_ERROR_NONE; +} + +rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream) { + *label = new uint64_t; + return RT_ERROR_NONE; +} + +rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model) { + *label = new uint64_t; + return RT_ERROR_NONE; +} -rtError_t rtLabelDestroy(rtLabel_t label) { return RT_ERROR_NONE; } +rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream) { + *label = new uint64_t; + return RT_ERROR_NONE; +} + +rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *dst, uint32_t dstMax) { + return RT_ERROR_NONE; +} + +rtError_t rtLabelDestroy(rtLabel_t label) { + uint64_t *stub = static_cast(label); + delete stub; + return RT_ERROR_NONE; +} rtError_t rtLabelSet(rtLabel_t label, rtStream_t stream) { return RT_ERROR_NONE; } @@ -255,8 +281,17 @@ rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t value, rtLa return RT_ERROR_NONE; } +rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t max, void *labelInfoPtr, rtStream_t stream) { + return RT_ERROR_NONE; +} + rtError_t rtLabelGoto(rtLabel_t label, rtStream_t stream) { return RT_ERROR_NONE; } +rtError_t rtLabelGotoEx(rtLabel_t label, rtStream_t stream) { + return RT_ERROR_NONE; +} + + rtError_t rtInvalidCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; } rtError_t rtModelLoadComplete(rtModel_t model) { return RT_ERROR_NONE; } @@ -364,12 +399,6 @@ rtError_t rtSetCtxINFMode(bool mode) return RT_ERROR_NONE; } -rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream) -{ - *label = new uint32_t; - return RT_ERROR_NONE; -} - rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value) { return RT_ERROR_NONE; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index b98c8546..697725c6 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -404,6 +404,8 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/kernel_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_set_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc" @@ -562,6 +564,46 @@ set(SINGLE_OP_SRC_FILES "${GE_CODE_DIR}/ge/single_op/single_op_manager.cc" "${GE_CODE_DIR}/ge/single_op/task/aicpu_task_builder.cc" "${GE_CODE_DIR}/ge/single_op/task/aicpu_kernel_task_builder.cc" + "${GE_CODE_DIR}/ge/hybrid/common/tensor_value.cc" + "${GE_CODE_DIR}/ge/hybrid/common/npu_memory_allocator.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/rt_callback_manager.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/node_state.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/node_done_manager.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_profiler.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_model_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_model_async_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_execution_context.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/subgraph_context.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/subgraph_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/worker/task_compile_engine.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/worker/shape_inference_engine.cc" + "${GE_CODE_DIR}/ge/hybrid/executor/worker/execution_engine.cc" + "${GE_CODE_DIR}/ge/hybrid/model/hybrid_model.cc" + "${GE_CODE_DIR}/ge/hybrid/model/hybrid_model_builder.cc" + "${GE_CODE_DIR}/ge/hybrid/model/node_item.cc" + "${GE_CODE_DIR}/ge/hybrid/model/graph_item.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_op_task.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_builder.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel_factory.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/rts/rts_node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/node_executor.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/task_context.cc" + "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model.cc" ) # test files @@ -587,6 +629,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/kernel_task_info_unittest.cc" "graph/load/memcpy_addr_async_task_info_unittest.cc" "graph/load/memcpy_async_task_info_unittest.cc" + "graph/load/cpu_queue_schedule_unittest.cc" #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" "graph/load/model_helper_unittest.cc" @@ -635,7 +678,7 @@ set(PASS_TEST_FILES "graph/passes/net_output_pass_unittest.cc" "graph/passes/no_use_reshape_remove_pass_unittest.cc" "graph/passes/infershape_pass_unittest.cc" - "graph/passes/multi_batch_clone_pass_unittest.cc" + "graph/passes/multi_batch_clone_pass_unittest.cc" ) set(KERNEL_TEST_FILES diff --git a/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc b/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc new file mode 100644 index 00000000..a36754b8 --- /dev/null +++ b/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public +#include "graph/load/model_manager/cpu_queue_schedule.h" +#undef private +#undef protected + +using namespace std; + +namespace ge { +class UtestCpuQueueSchedule : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +// test Init_CpuTaskZeroCopy_succ +TEST_F(UtestCpuQueueSchedule, CpuTaskZeroCopy_Init_Success) { + CpuTaskZeroCopy cpu_task_zero_copy(nullptr); + std::vector mbuf_list; + map outside_addrs; + ZeroCopyOffset addr_mapping; + addr_mapping.addr_count_ = 1; + std::vector addr_offset; + addr_offset.push_back((void*) 0x11110000); + uintptr_t addr = 0x12340000; + std::map> outside_addr; + outside_addr[(void*)addr] = addr_offset; + addr_mapping.outside_addrs_.emplace_back(outside_addr); + mbuf_list.emplace_back(addr); + uint32_t index = 0; + outside_addrs[index] = addr_mapping; + EXPECT_EQ(cpu_task_zero_copy.Init(mbuf_list, outside_addrs), SUCCESS); +} + +TEST_F(UtestCpuQueueSchedule, CpuTaskInfo_Init_args_valid) { + CpuTaskZeroCopy cpu_task_zero_copy(nullptr); + CpuTaskActiveEntry cpu_task_active_entry(nullptr); + CpuTaskModelDequeue cpu_task_model_dequeue(nullptr); + CpuTaskModelRepeat cpu_task_model_repeat(nullptr); + CpuTaskWaitEndGraph cpu_task_wait_end_graph(nullptr); + CpuTaskModelEnqueue cpu_task_model_enqueue(nullptr); + CpuTaskPrepareOutput cpu_task_prepare_output(nullptr); + EXPECT_EQ(cpu_task_zero_copy.Distribute(), FAILED); + EXPECT_EQ(cpu_task_active_entry.Distribute(), FAILED); + EXPECT_EQ(cpu_task_model_dequeue.Distribute(), FAILED); + EXPECT_EQ(cpu_task_model_repeat.Distribute(), FAILED); + EXPECT_EQ(cpu_task_wait_end_graph.Distribute(), FAILED); + EXPECT_EQ(cpu_task_model_enqueue.Distribute(), FAILED); + EXPECT_EQ(cpu_task_prepare_output.Distribute(), FAILED); +} +} // namespace ge diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 47968345..2b9bb4ed 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -32,21 +32,10 @@ class UtestDavinciModel : public testing::Test { void SetUp() {} void TearDown() {} - public: - NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { - GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); - auto op_desc = std::make_shared(name, type); - for (auto i = 0; i < in_num; ++i) { - op_desc->AddInputDesc(test_desc); - } - for (auto i = 0; i < out_num; ++i) { - op_desc->AddOutputDesc(test_desc); - } - return graph->AddNode(op_desc); - } }; -/*TEST_F(UtestDavinciModel, init_success) { +/* +TEST_F(UtestDavinciModel, init_success) { DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); ProfilingManager::Instance().is_load_profiling_ = true; @@ -130,7 +119,8 @@ class UtestDavinciModel : public testing::Test { EXPECT_EQ(outputs.size(), 1); ProfilingManager::Instance().is_load_profiling_ = false; -}*/ +} +*/ TEST_F(UtestDavinciModel, init_data_op) { DavinciModel model(0, nullptr); @@ -181,7 +171,8 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) { uint32_t data_op_index = 0; map data_by_index; - EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); + set input_outside_addrs; + EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index, input_outside_addrs), SUCCESS); EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0); @@ -206,7 +197,8 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { NodePtr node = graph->AddNode(op_output); std::vector output_op_list; - EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); + set output_outside_addrs; + EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list, output_outside_addrs), SUCCESS); EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0); @@ -755,4 +747,110 @@ TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) { EXPECT_EQ(model.output_addrs_list_.size(), 0); EXPECT_EQ(model.op_list_.size(), 1); } + +/* +// test label_set_task Init +TEST_F(UtestDavinciModel, label_task_success) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = make_shared("default"); + + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); + AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); + + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT32); + TensorUtils::SetSize(tensor, 64); + + uint32_t op_index = 0; + { + OpDescPtr op_desc = CreateOpDesc("label_switch", LABELSWITCHBYINDEX); + op_desc->AddInputDesc(tensor); + op_desc->SetInputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index = 0 + EXPECT_TRUE(AttrUtils::SetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, {0, 1})); + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX); + domi::LabelSwitchByIndexDef *label_task_def = task_def1->mutable_label_switch_by_index(); + label_task_def->set_op_index(op_index++); + label_task_def->set_label_max(2); + } + + { + OpDescPtr op_desc = CreateOpDesc("label_then", LABELSET); + NodePtr node = graph->AddNode(op_desc); // op_index = 1 + EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 1)); + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_LABEL_SET); + domi::LabelSetDef *label_task_def = task_def1->mutable_label_set(); + label_task_def->set_op_index(op_index++); + } + + { + OpDescPtr op_desc = CreateOpDesc("label_goto", LABELGOTOEX); + NodePtr node = graph->AddNode(op_desc); // op_index = 2 + EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 2)); + + domi::TaskDef *task_def2 = model_task_def->add_task(); + task_def2->set_stream_id(0); + task_def2->set_type(RT_MODEL_TASK_STREAM_LABEL_GOTO); + domi::LabelGotoExDef *label_task_def = task_def2->mutable_label_goto_ex(); + label_task_def->set_op_index(op_index++); + } + + { + OpDescPtr op_desc = CreateOpDesc("label_else", LABELSET); + NodePtr node = graph->AddNode(op_desc); // op_index = 3 + EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 0)); + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_LABEL_SET); + domi::LabelSetDef *label_task_def = task_def1->mutable_label_set(); + label_task_def->set_op_index(op_index++); + } + + { + OpDescPtr op_desc = CreateOpDesc("label_leave", LABELSET); + NodePtr node = graph->AddNode(op_desc); // op_index = 4 + EXPECT_TRUE(AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, 2)); + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_LABEL_SET); + domi::LabelSetDef *label_task_def = task_def1->mutable_label_set(); + label_task_def->set_op_index(op_index++); + } + + EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_LABEL_NUM, 3)); + EXPECT_EQ(model.Assign(ge_model), SUCCESS); + EXPECT_EQ(model.Init(), SUCCESS); + EXPECT_EQ(model.input_addrs_list_.size(), 0); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.task_list_.size(), 5); +} +*/ + +TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); + model.input_queue_ids_.emplace_back(0); + EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID); + EXPECT_EQ(model.input_data_info_.size(), 0); + ZeroCopyOffset zero_copy_offset; + model.input_data_info_[0] = zero_copy_offset; + model.output_queue_ids_.emplace_back(0); + EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID); + EXPECT_EQ(model.output_data_info_.size(), 0); + model.output_data_info_[0] = zero_copy_offset; + EXPECT_EQ(model.LoadWithQueue(), INTERNAL_ERROR); + EXPECT_EQ(model.active_stream_list_.size(), 0); +} } // namespace ge diff --git a/tests/ut/ge/single_op/single_op_manager_unittest.cc b/tests/ut/ge/single_op/single_op_manager_unittest.cc index a70d2984..05da8683 100644 --- a/tests/ut/ge/single_op/single_op_manager_unittest.cc +++ b/tests/ut/ge/single_op/single_op_manager_unittest.cc @@ -17,7 +17,6 @@ #include #include -#include "cce/taskdown_common.hpp" #include "runtime/rt.h" #define protected public diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h index 5fed8960..35fcc857 100644 --- a/third_party/fwkacllib/inc/register/op_kernel_registry.h +++ b/third_party/fwkacllib/inc/register/op_kernel_registry.h @@ -27,10 +27,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry { using CreateFn = HostCpuOp* (*)(); ~OpKernelRegistry(); - static OpKernelRegistry& GetInstance() { - static OpKernelRegistry instance; - return instance; - } + static OpKernelRegistry& GetInstance(); bool IsRegistered(const std::string &op_type); diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h index 318eb3ba..f7e37390 100644 --- a/third_party/fwkacllib/inc/register/op_registry.h +++ b/third_party/fwkacllib/inc/register/op_registry.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "register/register.h" @@ -88,7 +89,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { std::unordered_map fusion_parse_params_by_op_fn_map_; std::unordered_map op_types_to_parse_subgraph_post_func_; std::unordered_map> remove_input_configure_map_; - std::unordered_map origin_type_to_om_type_; + std::map origin_type_to_om_type_; std::unordered_map parse_op_to_graph_fn_map_; std::unordered_map op_types_to_parse_subgraph_post_func_v2_; };