From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmqtags/v1.2.0
@@ -2,6 +2,7 @@ | |||
/build | |||
/output | |||
/prebuilts | |||
/cov | |||
*.ir | |||
*.out | |||
@@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC) | |||
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | |||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | |||
if(PLATFORM STREQUAL "train") | |||
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
@@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC) | |||
elseif(PLATFORM STREQUAL "inference") | |||
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||
find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||
find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
if(PRODUCT STREQUAL "flr3") | |||
elseif(PRODUCT STREQUAL "flr1") | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
@@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC) | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | |||
endif() | |||
elseif(PLATFORM STREQUAL "all") | |||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||
find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||
find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | |||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
else() | |||
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||
endif() | |||
@@ -166,14 +166,14 @@ build_graphengine() | |||
echo "execute command: cmake ${CMAKE_ARGS} .. failed." | |||
return 1 | |||
fi | |||
COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||
COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||
TARGET=${COMMON_TARGET} | |||
if [ "x${PLATFORM}" = "xtrain" ] | |||
then | |||
TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}" | |||
TARGET="ge_runner fwk_atc.bin ${TARGET}" | |||
elif [ "x${PLATFORM}" = "xinference" ] | |||
then | |||
TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}" | |||
TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}" | |||
elif [ "X$ENABLE_GE_UT" = "Xon" ] | |||
then | |||
TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | |||
@@ -183,7 +183,7 @@ build_graphengine() | |||
elif [ "x${PLATFORM}" = "xall" ] | |||
then | |||
# build all the target | |||
TARGET="" | |||
TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" | |||
fi | |||
make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | |||
@@ -198,8 +198,6 @@ g++ -v | |||
mk_dir ${OUTPUT_PATH} | |||
build_graphengine || { echo "GraphEngine build failed."; return; } | |||
echo "---------------- GraphEngine build finished ----------------" | |||
#cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}" | |||
#rm -rf "${OUTPUT_PATH}/"libproto* | |||
rm -f ${OUTPUT_PATH}/libgmock*.so | |||
rm -f ${OUTPUT_PATH}/libgtest*.so | |||
rm -f ${OUTPUT_PATH}/lib*_stub.so | |||
@@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500 | |||
echo "---------------- GraphEngine output generated ----------------" | |||
# if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | |||
# cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH} | |||
# fi | |||
if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH} | |||
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH} | |||
@@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | |||
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | |||
# if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then | |||
# export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH} | |||
# echo ${LD_LIBRARY_PATH} | |||
${OUTPUT_PATH}/ut_libgraph && | |||
${OUTPUT_PATH}/ut_libge_multiparts_utest && | |||
${OUTPUT_PATH}/ut_libge_distinct_load_utest && | |||
@@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | |||
exit 1; | |||
fi | |||
# fi | |||
# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
echo "Generating coverage statistics, please wait..." | |||
cd ${BASEPATH} | |||
rm -rf ${BASEPATH}/cov | |||
mkdir ${BASEPATH}/cov | |||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
cd ${BASEPATH}/cov | |||
genhtml coverage.info | |||
echo "Generating coverage statistics, please wait..." | |||
cd ${BASEPATH} | |||
rm -rf ${BASEPATH}/cov | |||
mkdir ${BASEPATH}/cov | |||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
cd ${BASEPATH}/cov | |||
genhtml coverage.info | |||
fi | |||
# generate output package in tar form, including ut/st libraries/executables | |||
@@ -256,6 +244,8 @@ generate_package() | |||
ATC_PATH="atc/lib64" | |||
ATC_BIN_PATH="atc/bin" | |||
FWK_BIN_PATH="fwkacllib/bin" | |||
FWK_INCLUDE_PATH="fwkacllib/include" | |||
ATC_INCLUDE_PATH="atc/include" | |||
NNENGINE_PATH="plugin/nnengine/ge_config" | |||
OPSKERNEL_PATH="plugin/opskernel" | |||
@@ -277,6 +267,8 @@ generate_package() | |||
mk_dir "${OUTPUT_PATH}/${ACL_PATH}" | |||
mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}" | |||
mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | |||
mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" | |||
mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" | |||
cd "${OUTPUT_PATH}" | |||
@@ -289,10 +281,10 @@ generate_package() | |||
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \; | |||
MAX_DEPTH=1 | |||
if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] | |||
then | |||
MAX_DEPTH=2 | |||
fi | |||
# if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] | |||
# then | |||
# MAX_DEPTH=2 | |||
# fi | |||
for lib in "${PLUGIN_OPSKERNEL[@]}"; | |||
do | |||
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \; | |||
@@ -318,7 +310,15 @@ generate_package() | |||
find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \; | |||
find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \; | |||
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \; | |||
cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH} | |||
cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH} | |||
cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH} | |||
cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH} | |||
cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH} | |||
cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH} | |||
if [ "x${PLATFORM}" = "xtrain" ] | |||
then | |||
tar -cf graphengine_lib.tar fwkacllib | |||
@@ -339,4 +339,4 @@ then | |||
find ./ -name graphengine_lib.tar -exec rm {} \; | |||
tar -cf graphengine_lib.tar lib | |||
fi | |||
echo "---------------- GraphEngine package archive generated ----------------" | |||
echo "---------------- GraphEngine package archive generated ----------------" |
@@ -639,15 +639,6 @@ set(INFER_SRC_LIST | |||
"graph/load/model_manager/task_info/model_exit_task_info.cc" | |||
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||
"single_op/task/op_task.cc" | |||
"single_op/task/build_task_utils.cc" | |||
"single_op/task/tbe_task_builder.cc" | |||
"single_op/task/aicpu_task_builder.cc" | |||
"single_op/task/aicpu_kernel_task_builder.cc" | |||
"single_op/single_op.cc" | |||
"single_op/single_op_model.cc" | |||
"single_op/stream_resource.cc" | |||
"single_op/single_op_manager.cc" | |||
"hybrid/hybrid_davinci_model_stub.cc" | |||
"ir_build/ge_ir_build.cc" | |||
"ir_build/atc_ir_common.cc" | |||
@@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE | |||
FMK_SUPPORT_DUMP | |||
DAVINCI_CLOUD | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_compile_options(ge_runner PRIVATE | |||
-O2 | |||
-fno-common | |||
-fvisibility=hidden | |||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||
) | |||
@@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
) | |||
target_link_options(ge_runner PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_runner PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
adump_server | |||
@@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE | |||
FMK_HOST_INFER | |||
COMPILE_OMG_PACKAGE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_compile_options(ge_compiler PRIVATE | |||
-O2 | |||
-fno-common | |||
-fvisibility=hidden | |||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||
) | |||
@@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
) | |||
target_link_options(ge_compiler PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_compiler PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
static_mmpa | |||
@@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE | |||
-O2 | |||
-fvisibility=hidden | |||
) | |||
target_link_options(opensrc_ascendcl PRIVATE | |||
-rdynamic | |||
-Wl,--allow-multiple-definition | |||
@@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE | |||
-Wl,-Bsymbolic | |||
-Wl,--exclude-libs,ALL | |||
) | |||
target_link_libraries(opensrc_ascendcl PRIVATE | |||
-Wl,--whole-archive | |||
ge_executor | |||
@@ -12,7 +12,7 @@ set(PROTO_LIST | |||
"${METADEF_DIR}/proto/tensorflow/tensor.proto" | |||
"${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" | |||
"${METADEF_DIR}/proto/tensorflow/types.proto" | |||
"${METADEF_DIR}/proto/tensorflow/versions.proto" | |||
"${METADEF_DIR}/proto/tensorflow/versions.proto" | |||
) | |||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
@@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE | |||
FMK_SUPPORT_DUMP | |||
OS_CENTOS | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_compile_options(ge_common PRIVATE | |||
@@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
) | |||
target_link_options(ge_common PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_common PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
static_mmpa | |||
@@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE | |||
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | |||
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | |||
LOG_CPP | |||
FUNC_VISIBILITY | |||
) | |||
target_compile_options(ge_common_static PRIVATE | |||
@@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE | |||
OS_CENTOS | |||
google=ascend_private | |||
LOG_CPP | |||
FUNC_VISIBILITY | |||
) | |||
target_compile_options(ge_common PRIVATE | |||
@@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
) | |||
target_link_options(ge_common PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_common PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
ascend_protobuf_static | |||
@@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const { | |||
return false; | |||
} | |||
// Compare allocated graph id info between json and VarManager | |||
std::unordered_map<std::string, uint32_t> allocated_graph_id; | |||
std::map<std::string, uint32_t> allocated_graph_id; | |||
auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | |||
if (ret != SUCCESS) { | |||
GELOGW("Fail to parse AllocatedGraphId from Json."); | |||
@@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const { | |||
return false; | |||
} | |||
// Compare variable changed graph id info between json and VarManager | |||
std::unordered_map<std::string, uint32_t> changed_graph_id; | |||
std::map<std::string, uint32_t> changed_graph_id; | |||
auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | |||
if (ret != SUCCESS) { | |||
GELOGW("Fail to parse ChangedGraphId from Json."); | |||
@@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const { | |||
} | |||
// Compare variable address info between json and VarManager | |||
std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | |||
std::unordered_set<uint64_t> var_offset_set; | |||
std::set<uint64_t> var_offset_set; | |||
auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | |||
if (ret != SUCCESS) { | |||
GELOGW("Fail to parse VarAddrMgrMap from Json."); | |||
@@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const { | |||
GELOGW("Input param json type should be null or array."); | |||
return PARAM_INVALID; | |||
} | |||
std::unordered_map<std::string, uint32_t> allocated_graph_id; | |||
std::map<std::string, uint32_t> allocated_graph_id; | |||
auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | |||
if (ret != SUCCESS) { | |||
GELOGW("Fail to parse AllocatedGraphId from Json."); | |||
@@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const { | |||
GELOGW("Input param json type should be null or array."); | |||
return PARAM_INVALID; | |||
} | |||
std::unordered_map<std::string, uint32_t> changed_graph_id; | |||
std::map<std::string, uint32_t> changed_graph_id; | |||
auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | |||
if (ret != SUCCESS) { | |||
GELOGW("Fail to parse AllocatedGraphId from Json."); | |||
@@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { | |||
return PARAM_INVALID; | |||
} | |||
std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | |||
std::unordered_set<uint64_t> var_offset_set; | |||
std::set<uint64_t> var_offset_set; | |||
auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | |||
if (ret != SUCCESS) { | |||
GELOGW("Fail to parse VarAddrMgrMap from Json."); | |||
@@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemTyp | |||
Status ModelCacheHelper::ParseVarAddrMgrMapFromJson( | |||
const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | |||
std::unordered_set<uint64_t> &var_offset_set) { | |||
std::set<uint64_t> &var_offset_set) { | |||
if (!(json.is_array() || json.is_null())) { | |||
GELOGW("Input param json type should be null or array."); | |||
return PARAM_INVALID; | |||
@@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson( | |||
} | |||
Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | |||
std::unordered_map<std::string, uint32_t> &changed_graph_id) { | |||
std::map<std::string, uint32_t> &changed_graph_id) { | |||
if (!(json.is_array() || json.is_null())) { | |||
GELOGW("Input param json type should be null or array."); | |||
return PARAM_INVALID; | |||
@@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | |||
} | |||
Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json, | |||
std::unordered_map<std::string, uint32_t> &allocated_graph_id) { | |||
std::map<std::string, uint32_t> &allocated_graph_id) { | |||
if (!(json.is_array() || json.is_null())) { | |||
GELOGW("Input param json type should be null or array."); | |||
return PARAM_INVALID; | |||
@@ -95,15 +95,15 @@ class ModelCacheHelper { | |||
static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource); | |||
static Status ParseVarAddrMgrMapFromJson(const Json &json, | |||
std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | |||
std::unordered_set<uint64_t> &var_offset_set); | |||
std::set<uint64_t> &var_offset_set); | |||
static Status ParseCurVarTensorDescMapFromJson( | |||
const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map); | |||
static Status ParseTransRoadsFromJson(const Json &json, | |||
std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads); | |||
static Status ParseChangedGraphIdFromJson(const Json &json, | |||
std::unordered_map<std::string, uint32_t> &changed_graph_id); | |||
std::map<std::string, uint32_t> &changed_graph_id); | |||
static Status ParseAllocatedGraphIdFromJson(const Json &json, | |||
std::unordered_map<std::string, uint32_t> &allocated_graph_id); | |||
std::map<std::string, uint32_t> &allocated_graph_id); | |||
static Status ParseBroadcastInfoFromJson(const Json &json, | |||
std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info); | |||
static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name); | |||
@@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
return false; | |||
} | |||
std::unordered_set<uint32_t> record; | |||
std::set<uint32_t> record; | |||
for (size_t i = 0; i < device_nums; ++i) { | |||
uint32_t dev_id = deviceid_list[i]; | |||
if (dev_id >= static_cast<uint32_t>(dev_count)) { | |||
@@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE | |||
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | |||
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd> | |||
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT> | |||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||
) | |||
target_compile_definitions(ge_executor PRIVATE | |||
@@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE | |||
LOG_CPP | |||
) | |||
target_include_directories(ge_executor PRIVATE | |||
target_include_directories(ge_executor SYSTEM PRIVATE | |||
${GE_CODE_DIR}/ge | |||
${GE_CODE_DIR}/inc | |||
${GE_CODE_DIR}/inc/external | |||
@@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE | |||
-Werror | |||
-O2 | |||
-Wno-deprecated-declarations | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(ge_executor_shared PRIVATE | |||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
DAVINCI_SUPPORT_PROFILING | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(ge_executor_shared PRIVATE | |||
@@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(ge_executor_shared PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_executor_shared PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
msprofiler | |||
@@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
target_compile_options(ge_local_engine PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(ge_local_engine PRIVATE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(ge_local_engine PRIVATE | |||
@@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(ge_local_engine PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_local_engine PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
target_compile_options(atc_ge_local_engine PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(atc_ge_local_engine PRIVATE | |||
COMPILE_OMG_PACKAGE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(atc_ge_local_engine PRIVATE | |||
@@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(atc_ge_local_engine PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(atc_ge_local_engine PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR | |||
target_compile_options(ge_local_opskernel_builder PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(ge_local_opskernel_builder PRIVATE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(ge_local_opskernel_builder PRIVATE | |||
@@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(ge_local_opskernel_builder PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_local_opskernel_builder PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO | |||
target_compile_options(atc_ge_local_opskernel_builder PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||
@@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(atc_ge_local_opskernel_builder PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(atc_ge_local_opskernel_builder PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR | |||
target_compile_options(ge_local_opskernel_builder_static PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | |||
google=ascend_private | |||
LOG_CPP | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(ge_local_opskernel_builder_static PRIVATE | |||
@@ -17,6 +17,20 @@ | |||
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | |||
#define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <map> | |||
#include <memory> | |||
#include <string> | |||
@@ -32,7 +46,7 @@ namespace ge_local { | |||
* ge local engine. | |||
* Used for the ops not belong to any engine. eg:netoutput | |||
*/ | |||
class GeLocalEngine { | |||
class GE_FUNC_VISIBILITY GeLocalEngine { | |||
public: | |||
/** | |||
* get GeLocalEngine instance. | |||
@@ -94,25 +108,25 @@ extern "C" { | |||
* When Ge start, GE will invoke this interface | |||
* @return The status whether initialize successfully | |||
*/ | |||
ge::Status Initialize(const map<string, string> &options); | |||
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||
/** | |||
* After the initialize, GE will invoke this interface to get the Ops kernel Store | |||
* @param ops_kernel_map The ge local's ops kernel info | |||
*/ | |||
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||
/** | |||
* After the initialize, GE will invoke this interface to get the Graph Optimizer | |||
* @param graph_optimizers The ge local's Graph Optimizer objs | |||
*/ | |||
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||
/** | |||
* When the graph finished, GE will invoke this interface | |||
* @return The status whether initialize successfully | |||
*/ | |||
ge::Status Finalize(); | |||
GE_FUNC_VISIBILITY ge::Status Finalize(); | |||
} | |||
#endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ |
@@ -16,6 +16,20 @@ | |||
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||
#define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <mutex> | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "graph/node.h" | |||
@@ -23,7 +37,7 @@ | |||
#include "external/../register/register.h" | |||
namespace ge { | |||
class HostCpuEngine { | |||
class GE_FUNC_VISIBILITY HostCpuEngine { | |||
public: | |||
~HostCpuEngine() = default; | |||
@@ -22,7 +22,7 @@ | |||
namespace ge { | |||
namespace ge_local { | |||
class GeLocalOpsKernelBuilder : public OpsKernelBuilder { | |||
class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder { | |||
public: | |||
~GeLocalOpsKernelBuilder() override; | |||
Status Initialize(const map<std::string, std::string> &options) override; | |||
@@ -17,6 +17,20 @@ | |||
#ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | |||
#define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <map> | |||
#include <string> | |||
#include <vector> | |||
@@ -25,7 +39,7 @@ | |||
namespace ge { | |||
namespace ge_local { | |||
class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { | |||
class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { | |||
public: | |||
GeLocalOpsKernelInfoStore() = default; | |||
@@ -21,7 +21,7 @@ | |||
namespace ge { | |||
namespace ge_local { | |||
class GeDeletedOp : public Op { | |||
class GE_FUNC_VISIBILITY GeDeletedOp : public Op { | |||
public: | |||
GeDeletedOp(const Node &node, RunContext &run_context); | |||
@@ -21,7 +21,7 @@ | |||
namespace ge { | |||
namespace ge_local { | |||
class NoOp : public Op { | |||
class GE_FUNC_VISIBILITY NoOp : public Op { | |||
public: | |||
NoOp(const Node &node, RunContext &run_context); | |||
@@ -29,7 +29,7 @@ namespace ge_local { | |||
/** | |||
* The base class for all op. | |||
*/ | |||
class Op { | |||
class GE_FUNC_VISIBILITY Op { | |||
public: | |||
Op(const Node &node, RunContext &run_context); | |||
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte | |||
/** | |||
* manage all the op, support create op. | |||
*/ | |||
class OpFactory { | |||
class GE_FUNC_VISIBILITY OpFactory { | |||
public: | |||
static OpFactory &Instance(); | |||
@@ -72,7 +72,7 @@ class OpFactory { | |||
std::vector<std::string> all_ops_; | |||
}; | |||
class OpRegistrar { | |||
class GE_FUNC_VISIBILITY OpRegistrar { | |||
public: | |||
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | |||
OpFactory::Instance().RegisterCreator(type, func); | |||
@@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE | |||
-fno-common | |||
) | |||
target_compile_definitions(ge_runtime PRIVATE | |||
target_compile_definitions(ge_runtime PRIVATE | |||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
LOG_CPP | |||
) | |||
@@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE | |||
${CMAKE_BINARY_DIR}/proto/ge | |||
) | |||
target_link_options(ge_runtime PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(ge_runtime PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
} | |||
} | |||
reusable_block->continuous_block_ = continuous; | |||
reusable_block->ref_count_++; | |||
reusable_blocks_[memory_type][stream_id].erase((++it).base()); | |||
return reusable_block; | |||
} | |||
@@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | |||
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | |||
block->stream_id_ = node_op_desc->GetStreamId(); | |||
block->ref_count_++; | |||
block->continuous_block_ = continuous; | |||
block->batch_label_ = batch_label; | |||
if (mem_type == kOutput) { | |||
@@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
// hccl task need align header and tail | |||
block->first_continuous_block_ = true; | |||
block->last_continuous_block_ = true; | |||
++(block->ref_count_); | |||
} else { | |||
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
@@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
return nullptr, "Get no align size failed"); | |||
std::string symbol; | |||
bool reuse_input = false; | |||
if (IsSymbolExist(node_index_io, symbol)) { | |||
block = symbol_blocks_[symbol]; | |||
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | |||
@@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
block->SetLifeTimeEnd(life_time_); | |||
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | |||
block->ref_count_++; | |||
reuse_input = true; | |||
// add new size | |||
align_size = block_size; | |||
@@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | |||
} | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | |||
int out_count_reuse_input = block->ref_count_; | |||
int out_count = 0; | |||
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | |||
auto out_data_anchor = n->GetOutDataAnchor(index); | |||
@@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
out_count++; | |||
} | |||
} | |||
bool reuse_input = false; | |||
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
auto owner_node = in_anchor->GetOwnerNode(); | |||
GE_IF_BOOL_EXEC(owner_node == nullptr, continue); | |||
auto op_desc = owner_node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | |||
bool dst_reuse_input = false; | |||
uint32_t dst_reuse_input_index = 0; | |||
auto owner_node_op_desc = op_desc->GetOutputDescPtr(i); | |||
GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue); | |||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS, | |||
GELOGI("Get dst_reuse_input failed")); | |||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | |||
GELOGI("Get dst_reuse_input_index failed")); | |||
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | |||
out_count_reuse_input += 1; | |||
reuse_input = true; | |||
} | |||
} | |||
} | |||
block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count; | |||
block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1) | |||
: (block->ref_count_ + out_count); | |||
return block; | |||
} | |||
@@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec | |||
GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index, | |||
node_type_indexs.back().node->GetName().c_str()); | |||
if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) { | |||
bool is_block_matched = false; | |||
for (auto &node_type_index : node_type_indexs) { | |||
is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||
(node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())); | |||
if (is_block_matched) { | |||
GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, " | |||
"current node:%s, input index:%d, block ref_count:%d.", | |||
node_type_index.node->GetName().c_str(), node_type_index.index, | |||
node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_); | |||
break; | |||
} | |||
} | |||
if (is_block_matched) { | |||
ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); | |||
if (block->ref_count_ == 0 && block->same_stream_) { | |||
SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); | |||
} | |||
break; | |||
} | |||
} | |||
} | |||
@@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_ | |||
return; | |||
} | |||
void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) { | |||
if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) { | |||
return; | |||
} | |||
int64_t stream_id = node->GetOpDesc()->GetStreamId(); | |||
auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx)); | |||
bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0); | |||
if (is_suspended) { | |||
block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1); | |||
stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block); | |||
GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, " | |||
"size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_); | |||
} | |||
} | |||
Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | |||
auto op_desc = node->GetOpDesc(); | |||
int64_t stream_id = op_desc->GetStreamId(); | |||
@@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
// Allocate memory for the current node and release node memory of the same size in the workspace | |||
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | |||
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | |||
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | |||
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); | |||
iter->second[stream_id].clear();}); | |||
if (IsContinuousOutput(node)) { | |||
return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||
} | |||
@@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
continue; | |||
} | |||
symbol_blocks_[iter->second] = mem_block; | |||
// The output is suspended, and will be released in allocation of next node. | |||
CheckAndReleaseSuspendedBlock(node, i, mem_block); | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { | |||
return; | |||
} | |||
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { | |||
iter->second[stream_id].clear(); | |||
} | |||
vector<int64_t> temp; | |||
int64_t tatal_size = 0; | |||
GetNodeWorkSpaceSize(n, temp, tatal_size); | |||
@@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag, | |||
is_op_reuse_mem_, false, memory_type); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | |||
++(mem_block->ref_count_); | |||
CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); | |||
} | |||
for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | |||
@@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner { | |||
void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | |||
void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block); | |||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | |||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | |||
@@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner { | |||
std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_; | |||
std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | |||
std::map<std::string, uint32_t> node_continuous_input_counts_; | |||
// reuse memory | |||
vector<string> op_no_reuse_mem_vec_; | |||
@@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | |||
"size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||
node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), | |||
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | |||
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | |||
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | |||
} | |||
@@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||
} | |||
GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" | |||
" size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||
node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), | |||
out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(), | |||
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, | |||
is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); | |||
} | |||
@@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even | |||
// Create rt label | |||
for (uint32_t i = 0; i < label_num; ++i) { | |||
rtLabel_t label = nullptr; | |||
rt_ret = rtLabelCreate(&label); | |||
rt_ret = rtLabelCreateV2(&label, rt_model_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i); | |||
return RT_FAILED; | |||
@@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||
} | |||
} | |||
Status status = ReorderEventNodes(); | |||
Status status = whole_graph_->InsertGraphEvents(); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Graph ReorderEventNodes failed"); | |||
return status; | |||
@@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||
return SUCCESS; | |||
} | |||
Status StreamAllocator::ReorderEventNodes() const { | |||
Status status = whole_graph_->InsertEventNodes(); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Whole graph InsertEventNodes failed"); | |||
return status; | |||
} | |||
for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) { | |||
status = subgraph->InsertEventNodes(); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str()); | |||
return status; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
void StreamAllocator::DumpEvents() { | |||
map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | |||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||
@@ -74,7 +74,6 @@ class StreamAllocator { | |||
Status RefreshContinuousEvents(); | |||
Status InsertSyncEventNodes(); | |||
Status ReorderEventNodes() const; | |||
void DumpEvents(); | |||
@@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||
// and it have no attr or group attr different | |||
// which means bad case, return error | |||
bool call_check = true; | |||
std::unordered_set<int64_t> input_group_ids; | |||
std::set<int64_t> input_group_ids; | |||
for (const auto &input_node : node->GetInNodes()) { | |||
auto iter = nodes_with_group_attr.find(input_node); | |||
if (iter == nodes_with_group_attr.end()) { | |||
@@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||
return GE_GRAPH_GRAPH_NODE_NULL; | |||
} | |||
int64_t node_index = 0; | |||
for (auto &node : all_nodes) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
op_desc->SetId(node_index++); | |||
} | |||
map<int64_t, vector<OpDescPtr>> all_stream_ops; | |||
for (auto &node : all_nodes) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
@@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||
} | |||
if (graph->GetNeedIteration()) { | |||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { | |||
profiling_point.end_index.insert(current_idx); | |||
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | |||
op_desc->GetName().c_str(), current_idx); | |||
@@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { | |||
static TransOpUtil &Instance(); | |||
typedef std::unordered_map<std::string, int> transop_index_op; | |||
typedef std::map<std::string, int> transop_index_op; | |||
transop_index_op transop_index_map_; | |||
}; | |||
} // namespace ge | |||
@@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() { | |||
/// @param [in] outside_addrs: model input/output memory addr | |||
/// @return: 0 for success / others for failed | |||
/// | |||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) { | |||
if ((args_ != nullptr) || (args_size_ > 0)) { | |||
GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | |||
return FAILED; | |||
@@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
AddrMapInfo addr_map_info; | |||
for (auto &addrs : outside_addrs) { | |||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||
addr_map_info.addr_num += virtual_args_addr.second.size(); | |||
} | |||
} | |||
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||
// init src_addrs/dst_addrs | |||
size_t index = 0; | |||
vector<uint64_t> src_addrs; | |||
vector<uint64_t> dst_addrs; | |||
for (auto &addrs : outside_addrs) { | |||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||
for (const auto &addrs : outside_addrs) { | |||
const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||
addr_map_info.addr_num += virtual_args_addr.second.size(); | |||
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | |||
src_addrs.push_back(mbuf_list.at(index)); | |||
src_addrs.emplace_back(mbuf_list.at(addrs.first)); | |||
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||
} | |||
} | |||
index++; | |||
} | |||
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||
// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | |||
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | |||
@@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { | |||
~CpuTaskZeroCopy() override; | |||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | |||
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||
Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs); | |||
Status Distribute() override; | |||
private: | |||
@@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
}; | |||
vector<OpDescPtr> output_op_list; | |||
set<const void *> input_outside_addrs; | |||
set<const void *> output_outside_addrs; | |||
map<uint32_t, OpDescPtr> data_by_index; | |||
map<string, OpDescPtr> variable_by_name; | |||
auto nodes = compute_graph->GetAllNodes(); | |||
@@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | |||
if (IsDataOp(op_desc->GetType())) { | |||
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { | |||
if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | |||
return PARAM_INVALID; | |||
} | |||
@@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
} | |||
if (op_desc->GetType() == NETOUTPUT) { | |||
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { | |||
if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | |||
return PARAM_INVALID; | |||
} | |||
@@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||
/// @return Status | |||
/// | |||
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | |||
map<uint32_t, OpDescPtr> &data_by_index) { | |||
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) { | |||
// op_desc Checked by Init: Data, valid. | |||
auto op_desc = node->GetOpDesc(); | |||
if (node->GetOwnerComputeGraph() != graph) { | |||
@@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||
GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | |||
return PARAM_INVALID; | |||
} | |||
new_input_data_info_[data_index] = zero_copy_offset; | |||
for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | |||
void *addr = virtual_addr_list.at(index); | |||
if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) { | |||
continue; | |||
} | |||
zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_); | |||
new_input_outside_addrs_[addr] = zero_copy_offset; | |||
if (input_outside_addrs.count(virtual_addr) == 0) { | |||
int64_t output_offset = output_offset_list.at(kDataIndex); | |||
zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_); | |||
input_outside_addrs.insert(virtual_addr); | |||
} | |||
input_data_info_[data_index] = zero_copy_offset; | |||
return SUCCESS; | |||
} | |||
@@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | |||
/// @return Status | |||
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | |||
vector<OpDescPtr> &output_op_list) { | |||
vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) { | |||
// node->GetOpDesc Checked by Init: NetOutput, valid. | |||
auto op_desc = node->GetOpDesc(); | |||
// excludes the function op sub graph, e.g. case,if | |||
@@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||
return PARAM_INVALID; | |||
} | |||
size_t num = new_output_data_info_.size(); | |||
size_t num = output_data_info_.size(); | |||
bool fusion_flag = false; | |||
size_t input_count = input_size_list.size(); | |||
@@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||
Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); | |||
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", | |||
op_desc->GetName().c_str()); return PARAM_INVALID;); | |||
new_output_data_info_[num + idx] = zero_copy_offset; | |||
void *addr = virtual_addr_list.at(idx); | |||
int64_t input_offset = input_offset_list.at(idx); | |||
vector<void *> tensor_addrs; | |||
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | |||
auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset)); | |||
if (!rslt.second) { | |||
if (output_outside_addrs.count(addr) == 0) { | |||
vector<void *> tensor_addrs; | |||
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | |||
output_outside_addrs.insert(addr); | |||
for (size_t i = 0; i < tensor_addrs.size(); ++i) { | |||
void *real_addr = tensor_addrs.at(i); | |||
DisableZeroCopy(real_addr); | |||
real_virtual_addrs_.insert(real_addr); | |||
} | |||
} else { | |||
GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); | |||
DisableZeroCopy(addr); | |||
} | |||
for (size_t i = 0; i < tensor_addrs.size(); ++i) { | |||
void *real_addr = tensor_addrs.at(i); | |||
DisableZeroCopy(real_addr); | |||
real_virtual_addrs_.insert(real_addr); | |||
} | |||
output_data_info_[num + idx] = zero_copy_offset; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||
} | |||
rtLabel_t rt_label = nullptr; | |||
rtError_t rt_error = rtLabelCreateEx(&rt_label, stream); | |||
rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream); | |||
if (rt_error != RT_ERROR_NONE || rt_label == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error); | |||
return INTERNAL_ERROR; | |||
@@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() { | |||
return SUCCESS; | |||
} | |||
if (input_queue_ids_.size() != new_input_data_info_.size()) { | |||
if (input_queue_ids_.size() != input_data_info_.size()) { | |||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", | |||
input_queue_ids_.size(), new_input_data_info_.size()); | |||
input_queue_ids_.size(), input_data_info_.size()); | |||
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | |||
} | |||
if (output_queue_ids_.size() != new_output_data_info_.size()) { | |||
if (output_queue_ids_.size() != output_data_info_.size()) { | |||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, | |||
"Output queue ids not match model: output_queue=%zu output_data=%zu", | |||
output_queue_ids_.size(), new_output_data_info_.size()); | |||
output_queue_ids_.size(), output_data_info_.size()); | |||
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | |||
} | |||
GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); | |||
// Binding input_queue and Data Op. | |||
GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); | |||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed."); | |||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed."); | |||
// Binding output_queue and NetOutput Op. | |||
GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); | |||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed."); | |||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed."); | |||
GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); | |||
GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); | |||
@@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() { | |||
Status DavinciModel::BindInputQueue() { | |||
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | |||
for (size_t i = 0; i < input_queue_ids_.size(); ++i) { | |||
auto it = new_input_data_info_.find(i); | |||
if (it == new_input_data_info_.end()) { | |||
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i); | |||
auto it = input_data_info_.find(i); | |||
if (it == input_data_info_.end()) { | |||
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i); | |||
return FAILED; | |||
} | |||
@@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { | |||
} | |||
Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | |||
std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||
const map<uint32_t, ZeroCopyOffset> &outside_addrs) { | |||
GELOGI("Set CpuKernel model zero_copy task enter."); | |||
std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_); | |||
if (zero_copy == nullptr) { | |||
@@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | |||
Status DavinciModel::BindOutputQueue() { | |||
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | |||
for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | |||
auto it = new_output_data_info_.find(i); | |||
if (it == new_output_data_info_.end()) { | |||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); | |||
auto it = output_data_info_.find(i); | |||
if (it == output_data_info_.end()) { | |||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); | |||
return FAILED; | |||
} | |||
@@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() { | |||
Status DavinciModel::BindEnqueue() { | |||
for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | |||
auto it = new_output_data_info_.find(i); | |||
if (it == new_output_data_info_.end()) { | |||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); | |||
auto it = output_data_info_.find(i); | |||
if (it == output_data_info_.end()) { | |||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); | |||
return FAILED; | |||
} | |||
@@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs | |||
Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | |||
rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | |||
const std::vector<DataBuffer> &blobs = input_data.blobs; | |||
for (const auto &data : new_input_data_info_) { | |||
for (const auto &data : input_data_info_) { | |||
if (data.first >= blobs.size()) { | |||
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), | |||
new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, | |||
input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, | |||
data.second.GetOpName().c_str()); | |||
return FAILED; | |||
} | |||
@@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||
output_data.index = data_id; | |||
output_data.model_id = model_id_; | |||
if (output_data.blobs.size() != new_output_data_info_.size()) { | |||
if (output_data.blobs.size() != output_data_info_.size()) { | |||
GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), | |||
new_output_data_info_.size()); | |||
output_data_info_.size()); | |||
return FAILED; | |||
} | |||
std::vector<DataBuffer> &blobs = output_data.blobs; | |||
size_t idx = 0; | |||
for (const auto &output : new_output_data_info_) { | |||
for (const auto &output : output_data_info_) { | |||
if (output.first >= blobs.size()) { | |||
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), | |||
new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); | |||
input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); | |||
return FAILED; | |||
} | |||
@@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||
/// @return None. | |||
/// | |||
void DavinciModel::SetCopyOnlyOutput() { | |||
for (const auto &output_outside_addrs : new_output_outside_addrs_) { | |||
for (const auto &output_outside_addrs : output_data_info_) { | |||
ZeroCopyOffset output_outside = output_outside_addrs.second; | |||
if (!output_outside.IsRelativeOffsetValid()) { | |||
return; | |||
} | |||
for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { | |||
auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); | |||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count]; | |||
@@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||
for (size_t i = 0; i < nums; ++i) { | |||
std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | |||
for (auto &input_outside_addrs : new_input_outside_addrs_) { | |||
for (auto &input_outside_addrs : input_data_info_) { | |||
ZeroCopyOffset &input_outside = input_outside_addrs.second; | |||
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||
} | |||
for (auto &output_outside_addrs : new_output_outside_addrs_) { | |||
for (auto &output_outside_addrs : output_data_info_) { | |||
ZeroCopyOffset &output_outside = output_outside_addrs.second; | |||
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||
} | |||
@@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||
/// @return SUCCESS handle successfully / PARAM_INVALID for failed | |||
/// | |||
Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { | |||
if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { | |||
if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); | |||
return ACL_ERROR_GE_PARAM_INVALID; | |||
} | |||
if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != | |||
if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != | |||
SUCCESS) { | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); | |||
return ACL_ERROR_GE_PARAM_INVALID; | |||
@@ -675,7 +675,7 @@ class DavinciModel { | |||
/// @return Status | |||
/// | |||
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | |||
map<uint32_t, OpDescPtr> &data_by_index); | |||
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
@@ -694,7 +694,8 @@ class DavinciModel { | |||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | |||
/// @return Status | |||
/// | |||
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list); | |||
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list, | |||
set<const void *> &output_outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
@@ -764,7 +765,7 @@ class DavinciModel { | |||
/// | |||
Status BindInputQueue(); | |||
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs); | |||
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
@@ -897,10 +898,8 @@ class DavinciModel { | |||
void *global_step_addr_{nullptr}; | |||
uint64_t global_step_size_{0}; | |||
map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||
map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||
map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||
map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||
map<uint32_t, ZeroCopyOffset> input_data_info_; | |||
map<uint32_t, ZeroCopyOffset> output_data_info_; | |||
set<const void *> real_virtual_addrs_; | |||
@@ -100,8 +100,8 @@ class TsMemMall { | |||
private: | |||
std::mutex mem_mutex_; | |||
std::unordered_map<int64_t, void *> mem_store_size_; | |||
std::unordered_map<void *, int64_t> mem_store_addr_; | |||
std::map<int64_t, void *> mem_store_size_; | |||
std::map<void *, int64_t> mem_store_addr_; | |||
rtMemType_t mem_type_; | |||
}; | |||
} // namespace ge | |||
@@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const | |||
} | |||
} | |||
void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||
bool fusion_flag, std::set<const void *> &real_virtual_addrs) { | |||
void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, | |||
set<const void *> &real_virtual_addrs) { | |||
uint32_t out_count = 0; | |||
if (!fusion_flag) { | |||
out_count++; | |||
@@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l | |||
real_virtual_addrs.insert(addr); | |||
} else { | |||
GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); | |||
int64_t output_offset = output_offset_list.at(index); | |||
for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { | |||
if (zero_copy_basic_offset_.at(i) == output_offset) { | |||
out_count++; | |||
@@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l | |||
} | |||
} | |||
addr_count_ = out_count; | |||
valid_relative_offset_ = true; | |||
} | |||
void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | |||
@@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||
} | |||
} | |||
addr_count_ = out_count; | |||
valid_relative_offset_ = true; | |||
} | |||
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||
if (!valid_relative_offset_) { | |||
return; | |||
} | |||
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | |||
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | |||
auto args_addrs = outside_addrs_[out_count].find(outside_addr); | |||
@@ -43,8 +43,7 @@ class ZeroCopyOffset { | |||
~ZeroCopyOffset(); | |||
Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | |||
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||
bool fusion_flag, std::set<const void *> &real_virtual_addrs); | |||
void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs); | |||
void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); | |||
Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list, | |||
@@ -65,9 +64,10 @@ class ZeroCopyOffset { | |||
// data_size of Data/Netoutput | |||
int64_t GetDataSize() const { return data_size_; } | |||
// value of *outside_addrs_ from davinci_model | |||
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; } | |||
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; } | |||
// name of op | |||
std::string GetOpName() const { return op_name_; } | |||
const bool IsRelativeOffsetValid() const { return valid_relative_offset_; } | |||
private: | |||
void *basic_addr_ = nullptr; | |||
@@ -81,6 +81,7 @@ class ZeroCopyOffset { | |||
std::vector<int64_t> zero_copy_basic_offset_; | |||
std::vector<int64_t> zero_copy_relative_offset_; | |||
bool valid_relative_offset_ = false; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ |
@@ -131,7 +131,7 @@ bool IsTailingOptimization() { | |||
} | |||
ge::Status CheckFpCeilingMode() { | |||
static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"}; | |||
static const std::set<std::string> kValidFpCeilingMode = {"0", "1", "2"}; | |||
string mode; | |||
auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); | |||
if (ret == ge::GRAPH_SUCCESS) { | |||
@@ -170,8 +170,8 @@ class VarResource { | |||
std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_; | |||
std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_; | |||
std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_; | |||
std::unordered_map<std::string, uint32_t> var_names_to_changed_graph_id_; | |||
std::unordered_map<std::string, uint32_t> var_names_to_allocated_graph_id_; | |||
std::map<std::string, uint32_t> var_names_to_changed_graph_id_; | |||
std::map<std::string, uint32_t> var_names_to_allocated_graph_id_; | |||
std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_; | |||
}; | |||
@@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo | |||
/// Avoid recursion since stack space might be limited. | |||
/// We instead keep a stack of nodes to visit. | |||
std::vector<size_t> temp_stack; | |||
std::unordered_set<size_t> visited; | |||
std::set<size_t> visited; | |||
temp_stack.push_back(src); | |||
while (!temp_stack.empty()) { | |||
size_t cluster = temp_stack.back(); | |||
@@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>; | |||
using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>; | |||
using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>; | |||
using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>; | |||
using ClusterSet = std::unordered_set<size_t>; | |||
using ClusterSet = std::set<size_t>; | |||
class Cluster { | |||
public: | |||
size_t index_; // corresponding to rank of node | |||
@@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||
return FoldingPass::RunOpKernel(node, inputs, outputs); | |||
} | |||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> | |||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||
&ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||
return statistic_of_ge_constant_folding_; | |||
} | |||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> | |||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||
&ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||
return statistic_of_op_constant_folding_; | |||
} | |||
@@ -26,11 +26,11 @@ namespace ge { | |||
class ConstantFoldingPass : public FoldingPass { | |||
public: | |||
Status Run(ge::NodePtr &node) override; | |||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||
private: | |||
std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||
std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||
std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||
std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||
}; | |||
} // namespace ge | |||
@@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||
} | |||
GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); | |||
if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) { | |||
GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str()); | |||
return nullptr; | |||
} | |||
graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); | |||
@@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass { | |||
bool IsDataNode(const std::string& node_type); | |||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||
std::map<std::string, uint32_t> node_num_map_; | |||
}; | |||
} // namespace ge | |||
@@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass { | |||
bool IsDataNode(const std::string& node_type); | |||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||
std::map<std::string, uint32_t> node_num_map_; | |||
}; | |||
} // namespace ge | |||
@@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | |||
} | |||
// parser data dynamic info from atc parameter --input_shape | |||
if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims, | |||
data_to_dynamic_info_) != SUCCESS) { | |||
if (CheckAndParseDynamicData() != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); | |||
return PARAM_INVALID; | |||
} | |||
@@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | |||
return SUCCESS; | |||
} | |||
Status MultiBatchClonePass::CheckAndParseDynamicData() { | |||
size_t unknown_shape_count = 0; | |||
auto data_name_and_shape = GetLocalOmgContext().user_input_dims; | |||
std::vector<std::string> data_name_order; | |||
for (auto &item : data_name_and_shape) { | |||
data_name_order.push_back(item.first); | |||
} | |||
if (!getnext_sink_dynamic_dims_) { | |||
for (const auto &node : all_data_nodes_) { | |||
auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex); | |||
auto data_shape = data_desc.GetShape(); | |||
auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" : | |||
data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others"; | |||
auto data_name = node->GetName(); | |||
const auto &data_shape_dims = data_shape.GetDims(); | |||
if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) { | |||
continue; | |||
} | |||
++unknown_shape_count; | |||
auto iter = find(data_name_order.begin(), data_name_order.end(), data_name); | |||
if (iter == data_name_order.end()) { | |||
if (!GetLocalOmgContext().dynamic_batch_size.empty()) { | |||
auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name); | |||
GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.", | |||
data_name.c_str()); return PARAM_INVALID); | |||
} else if (!GetLocalOmgContext().dynamic_image_size.empty()) { | |||
auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format); | |||
GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.", | |||
data_name.c_str()); return PARAM_INVALID); | |||
} else if (!GetLocalOmgContext().dynamic_dims.empty()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"}, | |||
{"--input_shape", "all dynamic data must be set in --input_shape"}); | |||
GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", | |||
node->GetName().c_str(), data_shape.ToString().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
data_name_and_shape.emplace_back(data_name, data_shape_dims); | |||
} | |||
} | |||
} | |||
auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_); | |||
GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | |||
if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | |||
GELOGE(PARAM_INVALID, | |||
"Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); | |||
return PARAM_INVALID; | |||
} | |||
return SUCCESS; | |||
} | |||
Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { | |||
data_count_from_getnext_ = 0; | |||
getnext_sink_dynamic_dims_ = false; | |||
@@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass { | |||
/// @return 0: SUCCESS / others: FAILED | |||
/// | |||
Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num); | |||
Status CheckAndParseDynamicData(); | |||
std::string session_graph_id_; | |||
std::vector<std::vector<int64_t>> batch_shapes_; | |||
@@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass { | |||
std::vector<NodePtr> stream_switch_nodes_; | |||
std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_; | |||
std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_; | |||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||
std::map<std::string, uint32_t> node_num_map_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_ |
@@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ | |||
} | |||
} | |||
} | |||
auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); | |||
auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); | |||
GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | |||
if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | |||
@@ -377,7 +377,7 @@ bool InitDynamicParams(vector<vector<int64_t>> &shapes) { | |||
/// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | |||
/// @return true: Configed for Multi batch / false: Not configed for Multi batch. | |||
/// | |||
Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes, | |||
Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes, | |||
vector<pair<string, vector<int64_t>>> &data_name_and_shape, | |||
map<string, vector<vector<int64_t>> > &data_to_dynamic_info) { | |||
size_t cur_data_index = 0; | |||
@@ -74,7 +74,7 @@ Status CalcShape(const std::vector<int64_t> &batch_shape, GeShape &data_shape); | |||
/// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | |||
/// @return SUCCESS / PARAM_INVALID | |||
/// | |||
Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes, | |||
Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes, | |||
vector<pair<string, vector<int64_t>>> &data_name_and_shape, | |||
map<string, vector<vector<int64_t>>> &data_to_dynamic_info); | |||
@@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc); | |||
/// @param [in] const string &data_name: cur data name. | |||
/// @return 0: true/false | |||
/// | |||
bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name); | |||
GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name); | |||
/// | |||
/// @ingroup ge | |||
@@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_nam | |||
/// @param [in] const std::string &input_format: format of input. | |||
/// @return 0: true/false | |||
/// | |||
bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||
GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||
const std::string &input_format); | |||
} // namespace multibatch | |||
@@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
target_compile_options(host_cpu_engine PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(host_cpu_engine PRIVATE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(host_cpu_engine PRIVATE | |||
@@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(host_cpu_engine PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(host_cpu_engine PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
target_compile_options(atc_host_cpu_engine PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(atc_host_cpu_engine PRIVATE | |||
COMPILE_OMG_PACKAGE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(atc_host_cpu_engine PRIVATE | |||
@@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(atc_host_cpu_engine PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(atc_host_cpu_engine PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||
target_compile_options(host_cpu_opskernel_builder PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(host_cpu_opskernel_builder PRIVATE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(host_cpu_opskernel_builder PRIVATE | |||
@@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(host_cpu_opskernel_builder PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(host_cpu_opskernel_builder PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||
target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | |||
google=ascend_private | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | |||
@@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(atc_host_cpu_opskernel_builder PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) | |||
target_compile_options(host_cpu_opskernel_builder_static PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | |||
google=ascend_private | |||
LOG_CPP | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(host_cpu_opskernel_builder_static PRIVATE | |||
@@ -17,6 +17,20 @@ | |||
#ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||
#define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <map> | |||
#include <memory> | |||
#include <string> | |||
@@ -32,7 +46,7 @@ namespace host_cpu { | |||
* host cpu engine. | |||
* Used for the ops which executes on host. | |||
*/ | |||
class HostCpuEngine { | |||
class GE_FUNC_VISIBILITY HostCpuEngine { | |||
public: | |||
/** | |||
* get HostCpuEngine instance. | |||
@@ -87,25 +101,25 @@ extern "C" { | |||
* When Ge start, GE will invoke this interface | |||
* @return The status whether initialize successfully | |||
*/ | |||
ge::Status Initialize(const map<string, string> &options); | |||
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||
/** | |||
* After the initialize, GE will invoke this interface to get the Ops kernel Store | |||
* @param ops_kernel_map The host cpu's ops kernel info | |||
*/ | |||
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||
/** | |||
* After the initialize, GE will invoke this interface to get the Graph Optimizer | |||
* @param graph_optimizers The host cpu's Graph Optimizer objs | |||
*/ | |||
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||
/** | |||
* When the graph finished, GE will invoke this interface | |||
* @return The status whether initialize successfully | |||
*/ | |||
ge::Status Finalize(); | |||
GE_FUNC_VISIBILITY ge::Status Finalize(); | |||
} | |||
#endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ |
@@ -17,11 +17,25 @@ | |||
#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | |||
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include "common/opskernel/ops_kernel_builder.h" | |||
namespace ge { | |||
namespace host_cpu { | |||
class HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||
class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||
public: | |||
Status Initialize(const map<std::string, std::string> &options) override; | |||
@@ -17,6 +17,20 @@ | |||
#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | |||
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <map> | |||
#include <string> | |||
#include <vector> | |||
@@ -25,7 +39,7 @@ | |||
namespace ge { | |||
namespace host_cpu { | |||
class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||
class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||
public: | |||
HostCpuOpsKernelInfoStore() {} | |||
~HostCpuOpsKernelInfoStore() override = default; | |||
@@ -21,7 +21,7 @@ | |||
namespace ge { | |||
namespace host_cpu { | |||
class HostOp : public Op { | |||
class GE_FUNC_VISIBILITY HostOp : public Op { | |||
public: | |||
HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | |||
~HostOp() override = default; | |||
@@ -29,7 +29,7 @@ namespace host_cpu { | |||
/** | |||
* The base class for all op. | |||
*/ | |||
class Op { | |||
class GE_FUNC_VISIBILITY Op { | |||
public: | |||
Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} | |||
virtual ~Op() = default; | |||
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte | |||
/** | |||
* manage all the op, support create op. | |||
*/ | |||
class OpFactory { | |||
class GE_FUNC_VISIBILITY OpFactory { | |||
public: | |||
static OpFactory &Instance(); | |||
@@ -70,7 +70,7 @@ class OpFactory { | |||
std::vector<std::string> all_ops_; | |||
}; | |||
class OpRegistrar { | |||
class GE_FUNC_VISIBILITY OpRegistrar { | |||
public: | |||
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | |||
OpFactory::Instance().RegisterCreator(type, func); | |||
@@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s | |||
TensorValue::~TensorValue() { Destroy(); } | |||
void TensorValue::Destroy() { | |||
if (buffer_ != nullptr || ref_buffer_ != nullptr) { | |||
if (buffer_ != nullptr) { | |||
GELOGD("Unref tensor: %s", DebugString().c_str()); | |||
buffer_.reset(); | |||
} | |||
@@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||
GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | |||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | |||
HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); | |||
HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | |||
"Failed to execute partitioned call."); | |||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | |||
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||
args.outputs.clear(); | |||
HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | |||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | |||
return SUCCESS; | |||
@@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue> | |||
} | |||
Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||
const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||
const std::vector<ConstGeTensorDescPtr> &input_desc, | |||
const std::vector<TensorValue> &outputs) { | |||
GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | |||
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | |||
if (!outputs.empty()) { | |||
GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs), | |||
"Failed to enable output zero copy by user provided outputs."); | |||
} | |||
if (!graph_item_->IsDynamic()) { | |||
return ExecuteAsyncForKnownShape(inputs); | |||
} | |||
@@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||
return SUCCESS; | |||
} | |||
Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||
const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||
return ExecuteAsync(inputs, input_desc, {}); | |||
} | |||
Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | |||
GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | |||
if (graph_item_->GetAllNodes().size() != 1) { | |||
@@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { | |||
return SUCCESS; | |||
} | |||
Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) { | |||
GELOGD("To enable zero copy, output number = %zu", outputs.size()); | |||
const auto &output_edges = graph_item_->GetOutputEdges(); | |||
// Op -> MetOutput, set the output tensor of Op that output to the NetOutput node | |||
if (outputs.size() != output_edges.size()) { | |||
GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu", | |||
output_edges.size(), | |||
outputs.size()); | |||
return PARAM_INVALID; | |||
} | |||
for (size_t i = 0; i < outputs.size(); ++i) { | |||
auto &output_tensor = outputs[i]; | |||
auto &output_node = output_edges[i].first; | |||
int output_idx = output_edges[i].second; | |||
GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s", | |||
graph_item_->GetName().c_str(), | |||
i, | |||
output_node->NodeName().c_str(), | |||
output_idx, | |||
output_tensor.DebugString().c_str()); | |||
GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor), | |||
"[%s] Failed to set input tensor[%zu]", | |||
graph_item_->GetName().c_str(), | |||
i); | |||
} | |||
GELOGD("Done enabling zero copy for outputs successfully."); | |||
return SUCCESS; | |||
} | |||
} // namespace hybrid | |||
} // namespace ge |
@@ -43,7 +43,19 @@ class SubgraphExecutor { | |||
* @param input_desc input tensor descriptions | |||
* @return SUCCESS on success, error code otherwise | |||
*/ | |||
Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc); | |||
Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||
const std::vector<ConstGeTensorDescPtr> &input_desc); | |||
/** | |||
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are | |||
* valid after this method returned | |||
* @param inputs input tensors | |||
* @param input_desc input tensor descriptions | |||
* @return SUCCESS on success, error code otherwise | |||
*/ | |||
Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||
const std::vector<ConstGeTensorDescPtr> &input_desc, | |||
const std::vector<TensorValue> &outputs); | |||
/** | |||
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are | |||
@@ -76,6 +88,7 @@ class SubgraphExecutor { | |||
private: | |||
Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | |||
Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs); | |||
static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | |||
Status Init(const std::vector<TensorValue> &inputs, | |||
const std::vector<ConstGeTensorDescPtr> &input_desc); | |||
@@ -40,9 +40,14 @@ HybridModel::~HybridModel() { | |||
GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | |||
} | |||
Status HybridModel::Init() { | |||
Status HybridModel::Init(bool is_single_op) { | |||
GELOGD("Start to init hybrid model."); | |||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||
is_single_op_ = is_single_op; | |||
if (is_single_op) { | |||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); | |||
} else { | |||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||
} | |||
GELOGD("HybridModel initialized successfully."); | |||
return SUCCESS; | |||
} | |||
@@ -37,7 +37,7 @@ class HybridModel { | |||
~HybridModel(); | |||
Status Init(); | |||
Status Init(bool is_single_op = false); | |||
const NodeItem *GetNodeItem(const NodePtr &node) const; | |||
@@ -69,6 +69,10 @@ class HybridModel { | |||
return model_id_; | |||
} | |||
bool IsSingleOp() const { | |||
return is_single_op_; | |||
} | |||
TensorValue* GetVariable(const string &name) const; | |||
NodePtr GetVariableNode(const string &name) const; | |||
@@ -131,11 +135,13 @@ class HybridModel { | |||
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | |||
bool is_new_model_desc_ = false; // support aipp | |||
bool is_single_op_ = false; | |||
// runtime fields | |||
uint32_t device_id_ = 0; | |||
uint32_t model_id_ = 0; | |||
uint8_t *var_mem_base_ = nullptr; | |||
std::unique_ptr<TensorBuffer> weight_buffer_; | |||
RuntimeParam root_runtime_param_; | |||
}; | |||
} // namespace hybrid | |||
@@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() { | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::BuildForSingleOp() { | |||
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel"); | |||
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); | |||
GELOGI("[%s] Start to build hybrid model.", GetGraphName()); | |||
auto ret = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||
const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; | |||
GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), | |||
"[%s] Failed to index task defs", GetGraphName()); | |||
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); | |||
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); | |||
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | |||
GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName()); | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::ValidateParams() { | |||
GE_CHECK_NOTNULL(ge_root_model_); | |||
GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | |||
@@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() { | |||
} | |||
Status HybridModelBuilder::InitWeights() { | |||
// For constant in root graph | |||
const auto &root_graph = ge_root_model_->GetRootGraph(); | |||
const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||
auto iter = subgraph_models.find(root_graph->GetName()); | |||
if (iter == subgraph_models.end()) { | |||
GELOGD("Root graph model not found"); | |||
return SUCCESS; | |||
} | |||
auto &root_model = iter->second; | |||
const auto &weight_buffer = root_model->GetWeight(); | |||
if (weight_buffer.GetSize() == 0) { | |||
GELOGD("weight is empty"); | |||
return SUCCESS; | |||
} | |||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||
GE_CHECK_NOTNULL(allocator); | |||
for (auto &it : hybrid_model_.node_items_) { | |||
auto &node_item = it.second; | |||
if (node_item->node_type != CONSTANT) { | |||
hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size()); | |||
GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_); | |||
auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData()); | |||
GE_CHK_RT_RET(rtMemcpy(weight_base, | |||
hybrid_model_.weight_buffer_->GetSize(), | |||
weight_buffer.GetData(), | |||
weight_buffer.GetSize(), | |||
RT_MEMCPY_HOST_TO_DEVICE)); | |||
GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | |||
weight_base, | |||
hybrid_model_.weight_buffer_->GetSize()); | |||
for (auto &node : root_graph->GetDirectNode()) { | |||
if (node->GetType() != CONSTANT) { | |||
continue; | |||
} | |||
const auto &constant_node = node_item->node; | |||
auto op_desc = constant_node->GetOpDesc(); | |||
auto op_desc = node->GetOpDesc(); | |||
auto v_weights = ModelUtils::GetWeights(op_desc); | |||
if (v_weights.empty()) { | |||
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | |||
auto output_desc = op_desc->MutableOutputDesc(0); | |||
GE_CHECK_NOTNULL(output_desc); | |||
auto tensor_size = ge_tensor->GetData().GetSize(); | |||
GELOGD("[%s] Start to init Constant node [%s], size = %ld", | |||
GE_CHECK_NOTNULL(ge_tensor); | |||
const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); | |||
int64_t tensor_size = 0; | |||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), | |||
"[%s] Failed to get tensor size", | |||
node->GetName().c_str()); | |||
int64_t data_offset = 0; | |||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), | |||
"[%s] Failed to get data offset", | |||
node->GetName().c_str()); | |||
GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", | |||
GetGraphName(), | |||
constant_node->GetName().c_str(), | |||
tensor_size); | |||
node->GetName().c_str(), | |||
tensor_size, | |||
data_offset); | |||
auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); | |||
auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); | |||
GE_CHECK_NOTNULL(tensor_buffer); | |||
std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | |||
GE_CHECK_NOTNULL(constant_tensor); | |||
constant_tensor->SetName("Constant_" + op_desc->GetName()); | |||
if (tensor_size > 0) { | |||
GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), | |||
constant_tensor->GetSize(), | |||
ge_tensor->GetData().data(), | |||
ge_tensor->GetData().size(), | |||
RT_MEMCPY_HOST_TO_DEVICE)); | |||
} | |||
hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); | |||
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); | |||
hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); | |||
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) { | |||
// index task defs | |||
GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str()); | |||
std::unordered_map<int64_t, NodePtr> node_map; | |||
for (const auto &node : sub_graph->GetDirectNode()) { | |||
GE_CHECK_NOTNULL(node); | |||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
auto node_id = node->GetOpDesc()->GetId(); | |||
GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str()); | |||
node_map.emplace(node_id, node); | |||
} | |||
auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||
for (int i = 0; i < tasks.size(); ++i) { | |||
const domi::TaskDef &task_def = tasks[i]; | |||
GELOGI("Task id = %d, task type = %d", i, task_def.type()); | |||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
uint32_t op_index = -1; | |||
if (task_type == RT_MODEL_TASK_KERNEL) { | |||
op_index = task_def.kernel().context().op_index(); | |||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
op_index = task_def.kernel_ex().op_index(); | |||
} else if (task_type == RT_MODEL_TASK_HCCL) { | |||
op_index = task_def.kernel_hccl().op_index(); | |||
} else { | |||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
continue; | |||
} | |||
auto iter = node_map.find(op_index); | |||
if (iter == node_map.end()) { | |||
GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index); | |||
return INTERNAL_ERROR; | |||
} | |||
auto &node = iter->second; | |||
if (task_type == RT_MODEL_TASK_KERNEL) { | |||
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | |||
} | |||
GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index); | |||
hybrid_model_.task_defs_[node].emplace_back(task_def); | |||
} | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::IndexTaskDefs() { | |||
const auto &root_graph = ge_root_model_->GetRootGraph(); | |||
if (SetOutputNameAttr(*root_graph) != SUCCESS) { | |||
@@ -35,6 +35,7 @@ class HybridModelBuilder { | |||
explicit HybridModelBuilder(HybridModel &hybrid_model); | |||
~HybridModelBuilder() = default; | |||
Status Build(); | |||
Status BuildForSingleOp(); | |||
private: | |||
static Status UpdateAnchorStatus(const NodePtr &node); | |||
@@ -64,6 +65,7 @@ class HybridModelBuilder { | |||
Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | |||
Status ParseDependentForFusedSubgraph(NodeItem &node_item); | |||
Status IndexTaskDefs(); | |||
Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model); | |||
Status IndexSpecialNodes(); | |||
Status InitRuntimeParams(); | |||
Status InitModelMem(); | |||
@@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() { | |||
Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | |||
GE_CHECK_NOTNULL(node); | |||
GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | |||
bool is_single_op = model.IsSingleOp(); | |||
auto *task_defs = model.GetTaskDefs(node); | |||
if (task_defs == nullptr || task_defs->empty()) { | |||
@@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod | |||
AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | |||
std::unique_ptr<NodeTask> node_task; | |||
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str()); | |||
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op), | |||
"[%s] Failed to build op tasks.", node->GetName().c_str()); | |||
task = std::move(node_task); | |||
GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | |||
return SUCCESS; | |||
@@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||
} | |||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
if (rt_ret != RT_ERROR_NONE || is_single_op_) { | |||
void *bin_handle = nullptr; | |||
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | |||
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||
@@ -50,6 +50,8 @@ class AiCoreOpTask { | |||
uint32_t GetBlockDim() const {return block_dim_;} | |||
void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | |||
protected: | |||
Status UpdateTilingInfo(TaskContext &context); | |||
virtual std::string GetKeyForOpParamSize() const; | |||
@@ -72,6 +74,7 @@ class AiCoreOpTask { | |||
uint32_t args_size_ = 0; | |||
uint32_t block_dim_ = 1; | |||
bool clear_atomic_ = true; | |||
bool is_single_op_ = false; | |||
std::vector<int> output_indices_to_skip_; | |||
}; | |||
@@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector | |||
: op_desc_(op_desc), task_defs_(task_defs) { | |||
} | |||
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) { | |||
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, | |||
bool ignore_failure_on_atomic, | |||
bool is_single_op) { | |||
GE_CHECK_NOTNULL(op_desc_); | |||
if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | |||
GELOGE(INTERNAL_ERROR, | |||
@@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||
auto atomic_task = | |||
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | |||
GE_CHECK_NOTNULL(atomic_task); | |||
atomic_task->SetSingleOp(is_single_op); | |||
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | |||
"[%s] Failed to init task for AtomicAddrClean", | |||
op_desc_->GetName().c_str()); | |||
@@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||
// build aicore task | |||
auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | |||
GE_CHECK_NOTNULL(aicore_task); | |||
aicore_task->SetSingleOp(is_single_op); | |||
GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | |||
"[%s] Failed to init task for AtomicAddrClean", | |||
op_desc_->GetName().c_str()); | |||
@@ -47,7 +47,7 @@ class AiCoreTaskBuilder { | |||
AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | |||
~AiCoreTaskBuilder() = default; | |||
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic); | |||
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false); | |||
private: | |||
bool ExpectAtomicAddrCleanTask(); | |||
@@ -27,7 +27,7 @@ namespace ge { | |||
namespace hybrid { | |||
REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); | |||
const std::unordered_map<std::string, std::vector<uint32_t>> | |||
const std::map<std::string, std::vector<uint32_t>> | |||
RefInputTask::out_ref_input_index_ = {{DATA, {}}, | |||
{AIPPDATA, {}}, | |||
{RESHAPE, {}}, | |||
@@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<uint32_t>> | |||
{BROADCASTGRADIENTARGS, {}} | |||
}; | |||
const std::unordered_set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; | |||
const std::set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; | |||
Status RefInputTask::UpdateArgs(TaskContext &) { | |||
// no need update args | |||
@@ -46,7 +46,7 @@ class RefInputTask : public NodeTask { | |||
// key is op type, value is output ref input index, | |||
// e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one | |||
static const std::unordered_map<std::string, std::vector<uint32_t>> out_ref_input_index_; | |||
static const std::map<std::string, std::vector<uint32_t>> out_ref_input_index_; | |||
}; | |||
class DependInputShapeTask : public NodeTask { | |||
@@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask { | |||
const NodePtr node_; | |||
// ops depend input shape | |||
static const std::unordered_set<std::string> depend_input_shape_ops_; | |||
static const std::set<std::string> depend_input_shape_ops_; | |||
}; | |||
class ConstantNodeTask : public NodeTask { | |||
@@ -31,7 +31,7 @@ using std::map; | |||
using std::vector; | |||
namespace ge { | |||
class GELib { | |||
class GE_FUNC_VISIBILITY GELib { | |||
public: | |||
GELib() = default; | |||
~GELib() = default; | |||
@@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) { | |||
return ge::SUCCESS; | |||
} | |||
bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||
bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||
std::string &dynamic_batch_size) { | |||
int32_t size = 0; | |||
for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { | |||
@@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> | |||
return true; | |||
} | |||
bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||
bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||
const std::string input_format, std::string &dynamic_image_size) { | |||
if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { | |||
GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); | |||
@@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> | |||
return true; | |||
} | |||
bool CheckDynamicDimsInputShapeValid(const unordered_map<string, vector<int64_t>> &shape_map, | |||
bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_map, | |||
string input_format, string &dynamic_dims) { | |||
if (input_format != "ND") { | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
@@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||
return ge::SUCCESS; | |||
} | |||
unordered_map<string, vector<int64_t>> shape_map; | |||
map<string, vector<int64_t>> shape_map; | |||
vector<pair<string, vector<int64_t>>> user_shape_map; | |||
is_dynamic_input = true; | |||
if (input_shape.empty()) { | |||
@@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||
return ge::SUCCESS; | |||
} | |||
bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||
bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &shape_map, | |||
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | |||
vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | |||
const int DEFAULT_SHAPE_PAIR_SIZE = 2; | |||
@@ -46,13 +46,13 @@ static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat = | |||
static const std::string kEnableCompressWeightTrue = "1"; | |||
static const std::string kEnableCompressWeightFalse = "0"; | |||
bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||
bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||
std::string &dynamic_batch_size); | |||
bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||
bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||
const std::string input_format, std::string &dynamic_image_size); | |||
bool CheckDynamicDimsInputShapeValid(const std::unordered_map<std::string, std::vector<int64_t>> &shape_map, | |||
bool CheckDynamicDimsInputShapeValid(const std::map<std::string, std::vector<int64_t>> &shape_map, | |||
std::string input_format, std::string &dynamic_dims); | |||
bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); | |||
@@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string | |||
std::string &dynamic_dims, const std::string input_shape, | |||
const std::string input_format, bool &is_dynamic_input); | |||
bool ParseInputShape(const std::string &input_shape, std::unordered_map<string, std::vector<int64_t>> &shape_map, | |||
bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map, | |||
std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | |||
Status CheckOutputTypeParamValid(const std::string output_type); | |||
@@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | |||
if (options_.find(kInputShape) == options_.end()) { | |||
return GRAPH_SUCCESS; | |||
} | |||
unordered_map<string, vector<int64_t>> shape_map; | |||
map<string, vector<int64_t>> shape_map; | |||
vector<pair<string, vector<int64_t>>> user_shape_map; | |||
GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), | |||
return GRAPH_PARAM_INVALID, "parse input shape failed!"); | |||
@@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE | |||
-O2 | |||
-Wno-deprecated-declarations | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(atc_atc.bin PRIVATE | |||
@@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE | |||
COMPILE_OMG_PACKAGE | |||
google=ascend_private | |||
LOG_CPP | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(atc_atc.bin PRIVATE | |||
@@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
) | |||
target_link_options(atc_atc.bin PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(atc_atc.bin PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
ascend_protobuf | |||
@@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE | |||
-O2 | |||
-Wno-deprecated-declarations | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(fwk_atc.bin PRIVATE | |||
@@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE | |||
COMPILE_OMG_PACKAGE | |||
google=ascend_private | |||
LOG_CPP | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(fwk_atc.bin PRIVATE | |||
@@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
) | |||
target_link_options(fwk_atc.bin PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(fwk_atc.bin PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
ascend_protobuf | |||
@@ -23,7 +23,7 @@ | |||
namespace ge { | |||
using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>; | |||
class OpsKernelBuilderManager { | |||
class GE_FUNC_VISIBILITY OpsKernelBuilderManager { | |||
public: | |||
~OpsKernelBuilderManager(); | |||
@@ -41,7 +41,7 @@ using std::vector; | |||
namespace ge { | |||
using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>; | |||
class OpsKernelManager { | |||
class GE_FUNC_VISIBILITY OpsKernelManager { | |||
public: | |||
friend class GELib; | |||
@@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST}) | |||
target_compile_options(engine PRIVATE | |||
-Werror | |||
-fno-common | |||
-fvisibility=hidden | |||
) | |||
target_compile_definitions(engine PRIVATE | |||
REUSE_MEMORY=1 | |||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
FUNC_VISIBILITY | |||
) | |||
target_include_directories(engine PRIVATE | |||
@@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE | |||
${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
) | |||
target_link_options(engine PRIVATE | |||
-Wl,-Bsymbolic | |||
) | |||
target_link_libraries(engine PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
-Wl,--no-as-needed | |||
@@ -25,7 +25,7 @@ | |||
#include "plugin/engine/engine_manage.h" | |||
namespace ge { | |||
class AICoreDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine { | |||
public: | |||
AICoreDNNEngine() = default; | |||
explicit AICoreDNNEngine(const std::string &engine_name); | |||
@@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine { | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
class VectorCoreDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine { | |||
public: | |||
VectorCoreDNNEngine() = default; | |||
explicit VectorCoreDNNEngine(const std::string &engine_name); | |||
@@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine { | |||
}; | |||
class AICpuDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine { | |||
public: | |||
AICpuDNNEngine() = default; | |||
explicit AICpuDNNEngine(const std::string &engine_name); | |||
@@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine { | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
class AICpuTFDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine { | |||
public: | |||
AICpuTFDNNEngine() = default; | |||
explicit AICpuTFDNNEngine(const std::string &engine_name); | |||
@@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine { | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
class GeLocalDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine { | |||
public: | |||
GeLocalDNNEngine() = default; | |||
explicit GeLocalDNNEngine(const std::string &engine_name); | |||
@@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine { | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
class HostCpuDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine { | |||
public: | |||
HostCpuDNNEngine() = default; | |||
explicit HostCpuDNNEngine(const std::string &engine_name); | |||
@@ -116,7 +116,7 @@ private: | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
class RtsDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine { | |||
public: | |||
RtsDNNEngine() = default; | |||
explicit RtsDNNEngine(const std::string &engine_name); | |||
@@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine { | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
class HcclDNNEngine : public DNNEngine { | |||
class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine { | |||
public: | |||
HcclDNNEngine() = default; | |||
explicit HcclDNNEngine(const std::string &engine_name); | |||
@@ -17,6 +17,20 @@ | |||
#ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | |||
#define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <map> | |||
#include <memory> | |||
#include <string> | |||
@@ -26,7 +40,7 @@ | |||
namespace ge { | |||
using DNNEnginePtr = std::shared_ptr<DNNEngine>; | |||
class EngineManager { | |||
class GE_FUNC_VISIBILITY EngineManager { | |||
public: | |||
static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr); | |||
static DNNEnginePtr GetEngine(const std::string &engine_name); | |||
@@ -34,7 +48,7 @@ class EngineManager { | |||
}; | |||
extern "C" { | |||
void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines); | |||
GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines); | |||
} | |||
} // namespace ge | |||
#endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ |
@@ -77,6 +77,23 @@ Status InnerSession::Initialize() { | |||
UpdateThreadContext(std::map<std::string, std::string>{}); | |||
// session device id set here | |||
std::string str_session_device_id; | |||
if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) { | |||
GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str()); | |||
uint32_t session_device_id = 0; | |||
try { | |||
session_device_id = static_cast<uint32_t>(std::stoi(str_session_device_id.c_str())); | |||
// session device id has priority | |||
GetContext().SetCtxDeviceId(session_device_id); | |||
} catch (std::invalid_argument &) { | |||
GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); | |||
} catch (std::out_of_range &) { | |||
GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); | |||
} | |||
} | |||
GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | |||
DumpProperties dump_properties; | |||
@@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, | |||
} | |||
// Analyze the input shape paramete | |||
unordered_map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims; | |||
map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims; | |||
if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, | |||
is_dynamic_input) || | |||
@@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) { | |||
/// | |||
static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) { | |||
GE_CHECK_NOTNULL(graph); | |||
unordered_map<string, string> graphNodeTypes; | |||
map<string, string> graphNodeTypes; | |||
for (const NodePtr &node : graph->GetAllNodes()) { | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
@@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
const vector<DataBuffer> &input_buffers, | |||
vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &output_buffers) { | |||
GE_CHECK_NOTNULL(op_task_); | |||
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | |||
if (hybrid_model_executor_ != nullptr) { | |||
GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | |||
hybrid::HybridModelExecutor::ExecuteArgs args; | |||
for (auto &input : input_buffers) { | |||
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||
} | |||
for (auto &output : output_buffers) { | |||
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||
} | |||
for (auto &tensor_desc : input_desc) { | |||
auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||
GE_CHECK_NOTNULL(desc); | |||
args.input_desc.emplace_back(desc); | |||
} | |||
return hybrid_model_executor_->Execute(args); | |||
} | |||
std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
GE_CHECK_NOTNULL(op_task_); | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||
@@ -28,6 +28,7 @@ | |||
#include "runtime/stream.h" | |||
#include "task/op_task.h" | |||
#include "cce/aicpu_engine_struct.h" | |||
#include "hybrid/executor/hybrid_model_executor.h" | |||
namespace ge { | |||
class StreamResource; | |||
@@ -46,7 +47,7 @@ class SingleOp { | |||
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
friend class SingleOpModel; | |||
StreamResource *stream_resource_; | |||
StreamResource *stream_resource_ = nullptr; | |||
std::mutex *stream_mutex_; | |||
rtStream_t stream_ = nullptr; | |||
std::vector<void *> input_addr_list_; | |||
@@ -77,6 +78,8 @@ class DynamicSingleOp { | |||
std::vector<DataBuffer> &outputs) const; | |||
std::unique_ptr<OpTask> op_task_; | |||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||
uintptr_t resource_id_ = 0; | |||
std::mutex *stream_mutex_; | |||
rtStream_t stream_ = nullptr; | |||
@@ -31,6 +31,8 @@ | |||
#include "task/aicpu_task_builder.h" | |||
#include "task/aicpu_kernel_task_builder.h" | |||
#include "task/tbe_task_builder.h" | |||
#include "hybrid/executor/hybrid_model_executor.h" | |||
#include "hybrid/node_executor/node_executor.h" | |||
static std::atomic<std::uint64_t> aicpu_kernel_id(0); | |||
@@ -42,6 +44,20 @@ namespace ge { | |||
namespace { | |||
const size_t kDataOutputNum = 1; | |||
} // namespace | |||
static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||
auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||
for (const auto &node : comp_graph->GetAllNodes()) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
const auto &depends = op_desc->GetOpInferDepends(); | |||
if (!depends.empty()) { | |||
flag = true; | |||
return SUCCESS; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | |||
: model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | |||
@@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | |||
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | |||
model_params_.memory_size = UINT_MAX; | |||
auto ge_model = model_helper_.GetGeModel(); | |||
GE_CHECK_NOTNULL(ge_model); | |||
bool infer_depend_flag = false; | |||
GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag)); | |||
if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) { | |||
GELOGD("Build single op HybridModel."); | |||
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||
auto root_model = model_helper_.GetGeRootModel(); | |||
GE_CHECK_NOTNULL(root_model); | |||
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | |||
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); | |||
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||
GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model"); | |||
int32_t device_id = 0; | |||
GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||
device_id, | |||
resource.GetStream())); | |||
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); | |||
return SUCCESS; | |||
} | |||
return BuildTaskListForDynamicOp(single_op); | |||
} | |||
} // namespace ge |
@@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { | |||
return it->second.get(); | |||
} | |||
rtStream_t StreamResource::GetStream() const { | |||
return stream_; | |||
} | |||
void StreamResource::SetStream(rtStream_t stream) { | |||
stream_ = stream; | |||
} | |||
@@ -37,6 +37,7 @@ class StreamResource { | |||
StreamResource(StreamResource &&) = delete; | |||
StreamResource &operator=(const StreamResource &) = delete; | |||
StreamResource &operator=(StreamResource &&) = delete; | |||
rtStream_t GetStream() const; | |||
void SetStream(rtStream_t stream); | |||
SingleOp *GetOperator(const void *key); | |||
@@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve | |||
""" | |||
this attr is used for symbol table visible | |||
""" | |||
GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||
GE_ATTR = 'GE_FUNC_VISIBILITY' | |||
""" | |||
generate stub func body by return type | |||
@@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map<AscendString | |||
} | |||
// Initialize GE | |||
ATTRIBUTED_DEPRECATED(Status GEInitialize(const std::map<AscendString, AscendString> &)) | |||
Status GEInitialize(const std::map<std::string, std::string> &options); | |||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &)) | |||
GE_FUNC_VISIBILITY Status GEInitialize(const std::map<std::string, std::string> &options); | |||
Status GEInitialize(const std::map<AscendString, AscendString> &options); | |||
GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &options); | |||
// Finalize GE, release all resources | |||
Status GEFinalize(); | |||
GE_FUNC_VISIBILITY Status GEFinalize(); | |||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { | |||
class GE_FUNC_VISIBILITY Session { | |||
public: | |||
ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | |||
explicit Session(const std::map<std::string, std::string> &options); | |||
@@ -28,7 +28,7 @@ namespace ge { | |||
#define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | |||
#endif | |||
class StatusFactory { | |||
class GE_FUNC_VISIBILITY StatusFactory { | |||
public: | |||
static StatusFactory *Instance() { | |||
static StatusFactory instance; | |||
@@ -70,7 +70,7 @@ class StatusFactory { | |||
std::map<uint32_t, std::string> err_desc_; | |||
}; | |||
class ErrorNoRegisterar { | |||
class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
public: | |||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | |||
ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | |||
@@ -17,6 +17,20 @@ | |||
#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <stddef.h> | |||
#ifdef __cplusplus | |||
@@ -17,6 +17,20 @@ | |||
#ifndef INC_EXTERNAL_GE_IR_BUILD_H_ | |||
#define INC_EXTERNAL_GE_IR_BUILD_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <string> | |||
#include <map> | |||
#include <memory> | |||
@@ -44,17 +58,17 @@ struct ModelBufferData { | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &)) | |||
graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options); | |||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &)) | |||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options); | |||
graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief build model.Notice the model is stored in buffer | |||
* | |||
*/ | |||
void aclgrphBuildFinalize(); | |||
GE_FUNC_VISIBILITY void aclgrphBuildFinalize(); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -66,12 +80,12 @@ void aclgrphBuildFinalize(); | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &, | |||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &, | |||
ModelBufferData &)) | |||
graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options, | |||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options, | |||
ModelBufferData &model); | |||
graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options, | |||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options, | |||
ModelBufferData &model); | |||
/** | |||
@@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendStrin | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ATTRIBUTED_DEPRECATED(graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||
graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | |||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | |||
graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -98,7 +112,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -110,7 +124,7 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -123,7 +137,7 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs, | |||
GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs, | |||
const std::vector<TensorDesc> &outputs, Graph &graph); | |||
}; // namespace ge | |||
@@ -37,7 +37,7 @@ extern "C" { | |||
// trace status of log | |||
enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | |||
class GeLog { | |||
class GE_FUNC_VISIBILITY GeLog { | |||
public: | |||
static uint64_t GetTid() { | |||
#ifdef __GNUC__ | |||
@@ -278,7 +278,7 @@ | |||
} while (0) | |||
template <typename T> | |||
std::string FmtToStr(const T &t) { | |||
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||
std::string fmt; | |||
std::stringstream st; | |||
st << "[" << t << "]"; | |||
@@ -17,6 +17,20 @@ | |||
#ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | |||
#define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <map> | |||
#include <string> | |||
@@ -38,7 +52,7 @@ const int MODID_OME = 2; // OME module ID | |||
const int MODID_CALIBRATION = 3; // Calibration module ID | |||
namespace domi { | |||
class StatusFactory { | |||
class GE_FUNC_VISIBILITY StatusFactory { | |||
public: | |||
static StatusFactory *Instance(); | |||
@@ -54,7 +68,7 @@ class StatusFactory { | |||
std::map<uint32_t, std::string> err_desc_; | |||
}; | |||
class ErrorNoRegisterar { | |||
class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
public: | |||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | |||
~ErrorNoRegisterar() {} | |||
@@ -23,7 +23,7 @@ | |||
#include "graph/tensor.h" | |||
namespace ge { | |||
class GeFormatUtil { | |||
class GE_FUNC_VISIBILITY GeFormatUtil { | |||
public: | |||
/// | |||
/// @name TransShape | |||
@@ -215,7 +215,7 @@ struct ModelInfo { | |||
}; | |||
// Asynchronous callback interface, implemented by the caller | |||
class ModelListener { | |||
class GE_FUNC_VISIBILITY ModelListener { | |||
public: | |||
virtual ~ModelListener() {} | |||
/// | |||
@@ -17,11 +17,25 @@ | |||
#ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | |||
#define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | |||
#if defined(_MSC_VER) | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#else | |||
#ifdef FUNC_VISIBILITY | |||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||
#else | |||
#define GE_FUNC_VISIBILITY | |||
#endif | |||
#endif | |||
#include <gflags/gflags.h> | |||
#include <string> | |||
namespace ge { | |||
class GflagsUtils { | |||
class GE_FUNC_VISIBILITY GflagsUtils { | |||
public: | |||
static bool IsSetCommandTrue(const char *name) { | |||
std::string out; | |||
@@ -28,7 +28,7 @@ | |||
#include "model/ge_root_model.h" | |||
namespace ge { | |||
class ModelHelper { | |||
class GE_FUNC_VISIBILITY ModelHelper { | |||
public: | |||
ModelHelper() = default; | |||
~ModelHelper(); | |||