| @@ -50,9 +50,8 @@ CommentPragmas: '^ IWYU pragma:' | |||||
| CompactNamespaces: false | CompactNamespaces: false | ||||
| ConstructorInitializerAllOnOneLineOrOnePerLine: true | ConstructorInitializerAllOnOneLineOrOnePerLine: true | ||||
| ConstructorInitializerIndentWidth: 4 | ConstructorInitializerIndentWidth: 4 | ||||
| ContinuationIndentWidth: 2 | |||||
| ContinuationIndentWidth: 4 | |||||
| Cpp11BracedListStyle: true | Cpp11BracedListStyle: true | ||||
| DerivePointerAlignment: true | |||||
| DisableFormat: false | DisableFormat: false | ||||
| ExperimentalAutoDetectBinPacking: false | ExperimentalAutoDetectBinPacking: false | ||||
| FixNamespaceComments: true | FixNamespaceComments: true | ||||
| @@ -94,7 +93,7 @@ PenaltyBreakString: 1000 | |||||
| PenaltyBreakTemplateDeclaration: 10 | PenaltyBreakTemplateDeclaration: 10 | ||||
| PenaltyExcessCharacter: 1000000 | PenaltyExcessCharacter: 1000000 | ||||
| PenaltyReturnTypeOnItsOwnLine: 200 | PenaltyReturnTypeOnItsOwnLine: 200 | ||||
| PointerAlignment: Left | |||||
| PointerAlignment: Right | |||||
| RawStringFormats: | RawStringFormats: | ||||
| - Language: Cpp | - Language: Cpp | ||||
| Delimiters: | Delimiters: | ||||
| @@ -95,6 +95,7 @@ else () | |||||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | ||||
| else() | else() | ||||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | find_module(slog libalog.so ${ASCEND_ATC_DIR}) | ||||
| find_module(opt_feature libopt_feature.so ${ASCEND_ATC_DIR}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | ||||
| if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | ||||
| @@ -144,7 +144,6 @@ build_graphengine() | |||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_UT=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_UT=ON" | ||||
| fi | fi | ||||
| if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_ST=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_ST=ON" | ||||
| fi | fi | ||||
| @@ -176,7 +175,7 @@ build_graphengine() | |||||
| TARGET="ge_compiler atc_atc.bin ge_executor_shared ${TARGET}" | TARGET="ge_compiler atc_atc.bin ge_executor_shared ${TARGET}" | ||||
| elif [ "X$ENABLE_GE_ST" = "Xon" ] | elif [ "X$ENABLE_GE_ST" = "Xon" ] | ||||
| then | then | ||||
| TARGET="ge_graph_dsl_test graph_engine_test" | |||||
| TARGET="ge_graph_dsl_test ge_running_env_test graph_engine_test" | |||||
| elif [ "X$ENABLE_GE_UT" = "Xon" ] | elif [ "X$ENABLE_GE_UT" = "Xon" ] | ||||
| then | then | ||||
| TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | ||||
| @@ -244,13 +243,13 @@ if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | |||||
| mkdir -p ${OUTPUT_PATH}/plugin/opskernel | mkdir -p ${OUTPUT_PATH}/plugin/opskernel | ||||
| cp ${BUILD_PATH}/tests/framework/libnnengine.so ${OUTPUT_PATH}/plugin/nnengine | cp ${BUILD_PATH}/tests/framework/libnnengine.so ${OUTPUT_PATH}/plugin/nnengine | ||||
| cp ${BUILD_PATH}/engine_conf.json ${OUTPUT_PATH}/plugin/nnengine/ge_config | cp ${BUILD_PATH}/engine_conf.json ${OUTPUT_PATH}/plugin/nnengine/ge_config | ||||
| cp ${BUILD_PATH}/tests/framework/libhost_cpu_engine.so ${OUTPUT_PATH}/plugin/opskernel | |||||
| cp ${BUILD_PATH}/tests/framework/libge_local_engine.so ${OUTPUT_PATH}/plugin/opskernel | cp ${BUILD_PATH}/tests/framework/libge_local_engine.so ${OUTPUT_PATH}/plugin/opskernel | ||||
| cp ${BUILD_PATH}/tests/framework/stub_engine/libfe.so ${OUTPUT_PATH}/plugin/opskernel | |||||
| #prepare st execution bin | #prepare st execution bin | ||||
| cp ${BUILD_PATH}/tests/st/testcase/graph_engine_test ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/st/testcase/graph_engine_test ${OUTPUT_PATH} | ||||
| cp ${BUILD_PATH}/tests/framework/ge_running_env/tests/ge_running_env_test ${OUTPUT_PATH} | |||||
| cp ${BUILD_PATH}/tests/framework/ge_graph_dsl/tests/ge_graph_dsl_test ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/framework/ge_graph_dsl/tests/ge_graph_dsl_test ${OUTPUT_PATH} | ||||
| #execute st testcase | #execute st testcase | ||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ge_running_env_test && ${RUN_TEST_CASE} | |||||
| RUN_TEST_CASE=${OUTPUT_PATH}/graph_engine_test && ${RUN_TEST_CASE} | RUN_TEST_CASE=${OUTPUT_PATH}/graph_engine_test && ${RUN_TEST_CASE} | ||||
| RUN_TEST_CASE=${OUTPUT_PATH}/ge_graph_dsl_test && ${RUN_TEST_CASE} | RUN_TEST_CASE=${OUTPUT_PATH}/ge_graph_dsl_test && ${RUN_TEST_CASE} | ||||
| if [[ "$?" -ne 0 ]]; then | if [[ "$?" -ne 0 ]]; then | ||||
| @@ -298,7 +298,9 @@ set(TRAIN_SRC_LIST | |||||
| "graph/passes/hccl_continuous_memcpy_pass.cc" | "graph/passes/hccl_continuous_memcpy_pass.cc" | ||||
| "graph/passes/identity_pass.cc" | "graph/passes/identity_pass.cc" | ||||
| "graph/passes/ref_identity_delete_op_pass.cc" | "graph/passes/ref_identity_delete_op_pass.cc" | ||||
| "graph/passes/infer_base_pass.cc" | |||||
| "graph/passes/infershape_pass.cc" | "graph/passes/infershape_pass.cc" | ||||
| "graph/passes/infer_value_range_pass.cc" | |||||
| "graph/passes/iterator_op_pass.cc" | "graph/passes/iterator_op_pass.cc" | ||||
| "graph/passes/link_gen_mask_nodes_pass.cc" | "graph/passes/link_gen_mask_nodes_pass.cc" | ||||
| "graph/passes/merge_pass.cc" | "graph/passes/merge_pass.cc" | ||||
| @@ -434,6 +436,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/build/memory/max_block_mem_assigner.cc" | "graph/build/memory/max_block_mem_assigner.cc" | ||||
| "graph/build/memory/var_mem_assign_util.cc" | "graph/build/memory/var_mem_assign_util.cc" | ||||
| "graph/build/memory/buffer_pool_mem_assigner.cc" | "graph/build/memory/buffer_pool_mem_assigner.cc" | ||||
| "ge_opt_info/ge_opt_info.cc" | |||||
| ) | ) | ||||
| set(INFER_SRC_LIST | set(INFER_SRC_LIST | ||||
| @@ -547,7 +550,9 @@ set(INFER_SRC_LIST | |||||
| "graph/passes/shape_operate_op_remove_pass.cc" | "graph/passes/shape_operate_op_remove_pass.cc" | ||||
| "graph/passes/assert_pass.cc" | "graph/passes/assert_pass.cc" | ||||
| "graph/passes/dropout_pass.cc" | "graph/passes/dropout_pass.cc" | ||||
| "graph/passes/infer_base_pass.cc" | |||||
| "graph/passes/infershape_pass.cc" | "graph/passes/infershape_pass.cc" | ||||
| "graph/passes/infer_value_range_pass.cc" | |||||
| "graph/passes/unused_const_pass.cc" | "graph/passes/unused_const_pass.cc" | ||||
| "graph/passes/permute_pass.cc" | "graph/passes/permute_pass.cc" | ||||
| "graph/passes/ctrl_edge_transfer_pass.cc" | "graph/passes/ctrl_edge_transfer_pass.cc" | ||||
| @@ -711,6 +716,7 @@ set(INFER_SRC_LIST | |||||
| "graph/build/memory/max_block_mem_assigner.cc" | "graph/build/memory/max_block_mem_assigner.cc" | ||||
| "graph/build/memory/var_mem_assign_util.cc" | "graph/build/memory/var_mem_assign_util.cc" | ||||
| "graph/build/memory/buffer_pool_mem_assigner.cc" | "graph/build/memory/buffer_pool_mem_assigner.cc" | ||||
| "ge_opt_info/ge_opt_info.cc" | |||||
| ) | ) | ||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | ||||
| @@ -765,11 +771,13 @@ target_include_directories(ge_runner SYSTEM PRIVATE | |||||
| ${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
| ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external | ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external | ||||
| ${GE_CODE_DIR}/../abl/adump/external | ${GE_CODE_DIR}/../abl/adump/external | ||||
| ${GE_CODE_DIR}/../abl/licctrl | |||||
| #### blue zone | #### blue zone | ||||
| ${ASCEND_DIR}/driver/include | ${ASCEND_DIR}/driver/include | ||||
| ${ASCEND_DIR}/fwkacllib/include | ${ASCEND_DIR}/fwkacllib/include | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info | |||||
| ) | ) | ||||
| target_link_options(ge_runner PRIVATE | target_link_options(ge_runner PRIVATE | ||||
| @@ -792,6 +800,7 @@ target_link_libraries(ge_runner PRIVATE | |||||
| runtime | runtime | ||||
| error_manager | error_manager | ||||
| ascend_hal_stub | ascend_hal_stub | ||||
| opt_feature | |||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| json | json | ||||
| -lrt | -lrt | ||||
| @@ -839,11 +848,13 @@ target_include_directories(ge_compiler SYSTEM PRIVATE | |||||
| ${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
| ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external | ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external | ||||
| ${GE_CODE_DIR}/../abl/adump/external | ${GE_CODE_DIR}/../abl/adump/external | ||||
| ${GE_CODE_DIR}/../abl/licctrl | |||||
| #### blue zone #### | #### blue zone #### | ||||
| ${ASCEND_DIR}/driver/include | ${ASCEND_DIR}/driver/include | ||||
| ${ASCEND_DIR}/fwkacllib/include | ${ASCEND_DIR}/fwkacllib/include | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info | |||||
| ) | ) | ||||
| target_link_options(ge_compiler PRIVATE | target_link_options(ge_compiler PRIVATE | ||||
| @@ -863,6 +874,7 @@ target_link_libraries(ge_compiler PRIVATE | |||||
| error_manager | error_manager | ||||
| slog | slog | ||||
| runtime_compile | runtime_compile | ||||
| opt_feature | |||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| json | json | ||||
| -lrt | -lrt | ||||
| @@ -95,6 +95,7 @@ target_link_libraries(ge_common PRIVATE | |||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:runtime_headers>> | |||||
| static_mmpa | static_mmpa | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| @@ -155,6 +156,7 @@ target_link_libraries(ge_common_static PRIVATE | |||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:runtime_headers>> | |||||
| ascend_protobuf_static | ascend_protobuf_static | ||||
| json | json | ||||
| c_sec | c_sec | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include <cstdio> | #include <cstdio> | ||||
| #include <string> | #include <string> | ||||
| #include <regex> | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| @@ -37,6 +38,159 @@ const uint32_t kAtomicOverflow = (0x1 << 1); | |||||
| const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | ||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::Split(const std::string &s, | |||||
| std::vector<std::string> &result, | |||||
| const char *delchar) { | |||||
| if (s.empty()) { | |||||
| return; | |||||
| } | |||||
| result.clear(); | |||||
| char *buffer = new (std::nothrow)char[s.size() + 1]; | |||||
| if (buffer == nullptr) { | |||||
| GELOGE(FAILED, "[Split][string] failed while malloc memory, string value is:%s", s.c_str()); | |||||
| REPORT_CALL_ERROR("E19999", "Memory malloc may fail when split string, get fatal exception, " | |||||
| "string value is:%s", s.c_str()); | |||||
| return; | |||||
| } | |||||
| buffer[s.size()] = '\0'; | |||||
| errno_t e = strcpy_s(buffer, s.size() + 1, s.c_str()); | |||||
| if (e != EOK) { | |||||
| delete[] buffer; | |||||
| return; | |||||
| } | |||||
| char *context = nullptr; | |||||
| char *p = strtok_s(buffer, delchar, &context); | |||||
| while (p != nullptr) { | |||||
| result.emplace_back(p); | |||||
| p = strtok_s(nullptr, delchar, &context); | |||||
| } | |||||
| delete[] buffer; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckDumpStep(const std::string &dump_step) { | |||||
| std::string modified_dum_step = dump_step + "|"; | |||||
| std::smatch result; | |||||
| std::vector<string> match_vecs; | |||||
| std::regex pattern(R"((\d{1,}-\d{1,}\||\d{1,}\|)+)"); | |||||
| if (regex_match(modified_dum_step, result, pattern)) { | |||||
| Split(result.str(), match_vecs, "|"); | |||||
| if (match_vecs.empty()) { | |||||
| REPORT_CALL_ERROR("E19999", "Split may get fatal exception, dump_step:%s.", dump_step.c_str()); | |||||
| GELOGE(FAILED, "[Check][Param] failed. Split may get fatal exception, ge.exec.dumpStep:%s.", dump_step.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| // 100 is the max sets of dump steps. | |||||
| if (match_vecs.size() > 100) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpStep", | |||||
| dump_step.c_str(), | |||||
| " is not supported, only support dump <= 100 sets of data"})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, " | |||||
| "dump_step only support dump <= 100 sets of data.", dump_step.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| for (const auto &match_vec : match_vecs) { | |||||
| std::vector<string> vec_after_split; | |||||
| Split(match_vec, vec_after_split, "-"); | |||||
| if (match_vecs.empty()) { | |||||
| REPORT_CALL_ERROR("E19999", "Split may get fatal exception."); | |||||
| GELOGE(FAILED, "[Check][Param] failed, split may get fatal exception."); | |||||
| return FAILED; | |||||
| } | |||||
| if (vec_after_split.size() > 1) { | |||||
| if (std::atoi(vec_after_split[0].c_str()) >= std::atoi(vec_after_split[1].c_str())) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpStep", | |||||
| dump_step.c_str(), | |||||
| " is not supported." | |||||
| "in range steps, the first step is >= second step, correct example:'0|5|10-20"})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, " | |||||
| "in range steps, the first step is >= second step, correct example:'0|5|10-20'", dump_step.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| } | |||||
| } else { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpStep", | |||||
| dump_step.c_str(), | |||||
| " is not supported, correct example:'0|5|10|50-100."})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, " | |||||
| "dump_step string style is error, correct example:'0|5|10|50-100.'", dump_step.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckDumpMode(const std::string &dump_mode) { | |||||
| const std::set<string> dump_mode_list = {"input", "output", "all"}; | |||||
| std::set<string>::iterator iter; | |||||
| if ((iter = dump_mode_list.find(dump_mode)) == dump_mode_list.end()) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpMode", | |||||
| dump_mode.c_str(), | |||||
| " is not supported, should be one of the following:[input, output, all]"})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] the dump_debug_mode:%s, is is not supported," | |||||
| "should be one of the following:[input, output, all].", dump_mode.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckDumpPath(const std::string &input) { | |||||
| if (mmIsDir(input.c_str()) != EN_OK) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpPath", | |||||
| input.c_str(), | |||||
| " is not a directory."})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] the path:%s, is not directory.", input.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| char trusted_path[MMPA_MAX_PATH] = { "\0" }; | |||||
| if (mmRealPath(input.c_str(), trusted_path, MMPA_MAX_PATH) != EN_OK) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpPath", | |||||
| input.c_str(), | |||||
| " dumpPath invalid."})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] the dumpPath:%s, is invalid.", input.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (mmAccess2(trusted_path, M_R_OK | M_W_OK) != EN_OK) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpPath", | |||||
| input.c_str(), | |||||
| " does't have read, write permissions."})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] the path:%s, does't have read, write permissions.", input.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckEnableDump(const std::string &input) { | |||||
| std::set<string> enable_dump_option_list = {"1", "0"}; | |||||
| auto it = enable_dump_option_list.find(input); | |||||
| if (it == enable_dump_option_list.end()) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.enableDump", | |||||
| input.c_str(), | |||||
| " only support 1 or 0."})); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] Not support ge.exec.enableDump or ge.exec.enableDumpDebug format:%s, " | |||||
| "only support 1 or 0.", input.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | ||||
| CopyFrom(other); | CopyFrom(other); | ||||
| } | } | ||||
| @@ -47,7 +201,26 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties: | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOptions() { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::SetDumpOptions() { | |||||
| if (enable_dump_ == kEnableFlag) { | |||||
| std::string dump_step; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | |||||
| GE_CHK_STATUS_RET(CheckDumpStep(dump_step), "[Check][dump_step] failed."); | |||||
| GELOGI("Get dump step %s successfully", dump_step.c_str()); | |||||
| SetDumpStep(dump_step); | |||||
| } | |||||
| string dump_mode = "output"; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | |||||
| GELOGI("Get dump mode %s successfully", dump_mode.c_str()); | |||||
| GE_CHK_STATUS_RET(CheckDumpMode(dump_mode), "[Check][dump_mode] failed."); | |||||
| SetDumpMode(dump_mode); | |||||
| } | |||||
| AddPropertyValue(DUMP_ALL_MODEL, {}); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::InitByOptions() { | |||||
| enable_dump_.clear(); | enable_dump_.clear(); | ||||
| enable_dump_debug_.clear(); | enable_dump_debug_.clear(); | ||||
| dump_path_.clear(); | dump_path_.clear(); | ||||
| @@ -57,17 +230,32 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||||
| is_infer_op_debug_ = false; | is_infer_op_debug_ = false; | ||||
| op_debug_mode_ = 0; | op_debug_mode_ = 0; | ||||
| std::string enable_dump; | |||||
| std::string enable_dump = std::to_string(false); | |||||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump); | (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump); | ||||
| enable_dump_ = enable_dump; | enable_dump_ = enable_dump; | ||||
| if (!enable_dump_.empty()) { | |||||
| GE_CHK_STATUS_RET(CheckEnableDump(enable_dump_), "[Check][enable_dump] failed."); | |||||
| } | |||||
| std::string enable_dump_debug; | |||||
| std::string enable_dump_debug = std::to_string(false); | |||||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug); | (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug); | ||||
| enable_dump_debug_ = enable_dump_debug; | enable_dump_debug_ = enable_dump_debug; | ||||
| if (!enable_dump_debug_.empty()) { | |||||
| GE_CHK_STATUS_RET(CheckEnableDump(enable_dump_debug_), "[Check][enable_dump_debug] failed."); | |||||
| } | |||||
| if ((enable_dump_ == kEnableFlag) && (enable_dump_debug_ == kEnableFlag)) { | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.enableDump and ge.exec.enableDumpDebug", | |||||
| enable_dump_ + ", " + enable_dump_debug, | |||||
| "ge.exec.enableDump and ge.exec.enableDumpDebug cannot be set to 1 at the same time."})); | |||||
| GELOGE(FAILED, "ge.exec.enableDump and ge.exec.enableDumpDebug cannot be both set to 1 at the same time."); | |||||
| return FAILED; | |||||
| } | |||||
| if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) { | if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) { | ||||
| std::string dump_path; | std::string dump_path; | ||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) { | if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) { | ||||
| GE_CHK_STATUS_RET(CheckDumpPath(dump_path), "Check dump path failed."); | |||||
| if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | ||||
| dump_path = dump_path + "/"; | dump_path = dump_path + "/"; | ||||
| } | } | ||||
| @@ -75,25 +263,21 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||||
| GELOGI("Get dump path %s successfully", dump_path.c_str()); | GELOGI("Get dump path %s successfully", dump_path.c_str()); | ||||
| SetDumpPath(dump_path); | SetDumpPath(dump_path); | ||||
| } else { | } else { | ||||
| GELOGW("Dump path is not set"); | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpPath", | |||||
| dump_path, | |||||
| "ge.exec.dumpPath is not set."})); | |||||
| GELOGE(FAILED, "[Check][dump_path] failed. Dump path is not set."); | |||||
| return FAILED; | |||||
| } | } | ||||
| } | } | ||||
| if (enable_dump_ == kEnableFlag) { | |||||
| std::string dump_step; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | |||||
| GELOGI("Get dump step %s successfully", dump_step.c_str()); | |||||
| SetDumpStep(dump_step); | |||||
| } | |||||
| string dump_mode; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | |||||
| GELOGI("Get dump mode %s successfully", dump_mode.c_str()); | |||||
| SetDumpMode(dump_mode); | |||||
| } | |||||
| AddPropertyValue(DUMP_ALL_MODEL, {}); | |||||
| } | |||||
| GE_CHK_STATUS_RET(SetDumpOptions(), "SetDumpOptions failed."); | |||||
| GE_CHK_STATUS_RET(SetDumpDebugOptions(), "SetDumpDebugOptions failed."); | |||||
| SetDumpDebugOptions(); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| // The following is the new dump scenario of the fusion operator | // The following is the new dump scenario of the fusion operator | ||||
| @@ -253,14 +437,20 @@ void DumpProperties::CopyFrom(const DumpProperties &other) { | |||||
| } | } | ||||
| } | } | ||||
| void DumpProperties::SetDumpDebugOptions() { | |||||
| Status DumpProperties::SetDumpDebugOptions() { | |||||
| if (enable_dump_debug_ == kEnableFlag) { | if (enable_dump_debug_ == kEnableFlag) { | ||||
| std::string dump_debug_mode; | std::string dump_debug_mode; | ||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) { | if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) { | ||||
| GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str()); | GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str()); | ||||
| } else { | } else { | ||||
| GELOGW("Dump debug mode is not set."); | |||||
| return; | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpDebugMode", | |||||
| dump_debug_mode, | |||||
| "ge.exec.dumpDebugMode is not set."})); | |||||
| GELOGE(PARAM_INVALID, "[Check][dump_debug_mode] failed. Dump debug mode is not set."); | |||||
| return PARAM_INVALID; | |||||
| } | } | ||||
| if (dump_debug_mode == OP_DEBUG_AICORE) { | if (dump_debug_mode == OP_DEBUG_AICORE) { | ||||
| @@ -276,10 +466,17 @@ void DumpProperties::SetDumpDebugOptions() { | |||||
| is_train_op_debug_ = true; | is_train_op_debug_ = true; | ||||
| op_debug_mode_ = kAllOverflow; | op_debug_mode_ = kAllOverflow; | ||||
| } else { | } else { | ||||
| GELOGW("ge.exec.dumpDebugMode is invalid."); | |||||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({ | |||||
| "ge.exec.dumpDebugMode", | |||||
| dump_debug_mode, | |||||
| "ge.exec.dumpDebugMode is invalid."})); | |||||
| GELOGE(PARAM_INVALID, "[Set][DumpDebugOptions] failed, ge.exec.dumpDebugMode is invalid."); | |||||
| return PARAM_INVALID; | |||||
| } | } | ||||
| } else { | } else { | ||||
| GELOGI("ge.exec.enableDumpDebug is false or is not set."); | GELOGI("ge.exec.enableDumpDebug is false or is not set."); | ||||
| } | } | ||||
| return SUCCESS; | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| namespace ge { | namespace ge { | ||||
| using Status = uint32_t; | |||||
| class DumpProperties { | class DumpProperties { | ||||
| public: | public: | ||||
| DumpProperties() = default; | DumpProperties() = default; | ||||
| @@ -33,7 +34,7 @@ class DumpProperties { | |||||
| DumpProperties &operator=(const DumpProperties &dump); | DumpProperties &operator=(const DumpProperties &dump); | ||||
| void InitByOptions(); | |||||
| Status InitByOptions(); | |||||
| void AddPropertyValue(const std::string &model, const std::set<std::string> &layers); | void AddPropertyValue(const std::string &model, const std::set<std::string> &layers); | ||||
| @@ -95,7 +96,20 @@ class DumpProperties { | |||||
| private: | private: | ||||
| void CopyFrom(const DumpProperties &other); | void CopyFrom(const DumpProperties &other); | ||||
| void SetDumpDebugOptions(); | |||||
| Status SetDumpDebugOptions(); | |||||
| Status SetDumpOptions(); | |||||
| void Split(const std::string &s, std::vector<std::string> &result, const char *delchar); | |||||
| Status CheckDumpStep(const std::string &dump_step); | |||||
| Status CheckDumpMode(const std::string &dump_mode); | |||||
| Status CheckDumpPath(const std::string &input); | |||||
| Status CheckEnableDump(const std::string &input); | |||||
| std::string enable_dump_; | std::string enable_dump_; | ||||
| std::string enable_dump_debug_; | std::string enable_dump_debug_; | ||||
| @@ -161,6 +161,7 @@ Status ExceptionDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> &ex | |||||
| uint64_t proto_size = dump_data.ByteSizeLong(); | uint64_t proto_size = dump_data.ByteSizeLong(); | ||||
| std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | ||||
| GE_CHECK_NOTNULL(proto_msg); | |||||
| bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | ||||
| if (!ret || proto_size == 0) { | if (!ret || proto_size == 0) { | ||||
| REPORT_INNER_ERROR("E19999", "Serialize proto to string fail"); | REPORT_INNER_ERROR("E19999", "Serialize proto to string fail"); | ||||
| @@ -49,6 +49,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY std::string ShapeToString(const s | |||||
| return JoinToString(shape); | return JoinToString(shape); | ||||
| } | } | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY | |||||
| std::string RangeToString(const std::vector<std::pair<int64_t, int64_t>> &ranges) { | |||||
| bool first = true; | |||||
| std::stringstream ss; | |||||
| ss << "["; | |||||
| for (const auto &range : ranges) { | |||||
| if (first) { | |||||
| first = false; | |||||
| } else { | |||||
| ss << ","; | |||||
| } | |||||
| ss << "{"; | |||||
| ss << range.first << "," << range.second; | |||||
| ss << "}"; | |||||
| } | |||||
| ss << "]"; | |||||
| return ss.str(); | |||||
| } | |||||
| int64_t GetItemNumByShape(const std::vector<int64_t> &shape) { | int64_t GetItemNumByShape(const std::vector<int64_t> &shape) { | ||||
| int64_t num = 1; | int64_t num = 1; | ||||
| for (auto dim : shape) { | for (auto dim : shape) { | ||||
| @@ -54,6 +54,8 @@ std::string ShapeToString(const GeShape &shape); | |||||
| std::string ShapeToString(const std::vector<int64_t> &shape); | std::string ShapeToString(const std::vector<int64_t> &shape); | ||||
| std::string RangeToString(const std::vector<std::pair<int64_t, int64_t>> &ranges); | |||||
| int64_t GetItemNumByShape(const std::vector<int64_t> &shape); | int64_t GetItemNumByShape(const std::vector<int64_t> &shape); | ||||
| bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dims); | bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dims); | ||||
| @@ -186,6 +186,8 @@ target_include_directories(ge_executor SYSTEM PRIVATE | |||||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | ${CMAKE_BINARY_DIR}/proto/graphengine_protos | ||||
| #### yellow zone #### | #### yellow zone #### | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:${GE_DEPEND_DIR}/inc> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:${GE_DEPEND_DIR}/inc> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<TARGET_PROPERTY:runtime_headers,INTERFACE_INCLUDE_DIRECTORIES>> | |||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<TARGET_PROPERTY:cce_headers,INTERFACE_INCLUDE_DIRECTORIES>> | |||||
| #### blue zone #### | #### blue zone #### | ||||
| $<$<BOOL:${ENABLE_OPEN_SRC}>:${GE_CODE_DIR}/third_party/fwkacllib/inc> | $<$<BOOL:${ENABLE_OPEN_SRC}>:${GE_CODE_DIR}/third_party/fwkacllib/inc> | ||||
| $<$<BOOL:${ENABLE_OPEN_SRC}>:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> | $<$<BOOL:${ENABLE_OPEN_SRC}>:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> | ||||
| @@ -251,6 +253,8 @@ target_link_libraries(ge_executor_shared PRIVATE | |||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | ||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:runtime_headers>> | |||||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:cce_headers>> | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| ge_common | ge_common | ||||
| runtime | runtime | ||||
| @@ -125,34 +125,41 @@ void SetDynamicInputDataFlag(const ge::RunModelData &input_data, const std::vect | |||||
| bool IsDynamicBatchSizeMatchModel(uint64_t batch_size, const vector<std::vector<int64_t>> &batch_info) { | bool IsDynamicBatchSizeMatchModel(uint64_t batch_size, const vector<std::vector<int64_t>> &batch_info) { | ||||
| if (batch_info.empty()) { | if (batch_info.empty()) { | ||||
| GELOGE(ge::FAILED, "Dynamic batch info is empty."); | |||||
| REPORT_INNER_ERROR("E19999", "param Dynamic batch info is empty, check invalid."); | |||||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch info is empty."); | |||||
| return false; | return false; | ||||
| } | } | ||||
| for (auto batch : batch_info) { | for (auto batch : batch_info) { | ||||
| if (batch.size() != kDynamicBatchSizeVecSize) { | if (batch.size() != kDynamicBatchSizeVecSize) { | ||||
| GELOGE(ge::FAILED, "Dynamic batch param num is %zu, current batch size is %zu.", kDynamicBatchSizeVecSize, | |||||
| batch.size()); | |||||
| REPORT_INNER_ERROR("E19999", "Dynamic batch param num is %zu, current batch size is %zu.", | |||||
| kDynamicBatchSizeVecSize, batch.size()); | |||||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch param num is %zu, current batch size is %zu.", | |||||
| kDynamicBatchSizeVecSize, batch.size()); | |||||
| return false; | return false; | ||||
| } | } | ||||
| if (batch[0] == static_cast<int64_t>(batch_size)) { | if (batch[0] == static_cast<int64_t>(batch_size)) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| } | } | ||||
| GELOGE(ge::FAILED, "Dynamic batch %lu can not match the gear of model.", batch_size); | |||||
| REPORT_INNER_ERROR("E19999", "Dynamic batch %lu can not match the gear of model.", batch_size); | |||||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch %lu can not match the gear of model.", batch_size); | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, | bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, | ||||
| const vector<std::vector<int64_t>> &batch_info) { | const vector<std::vector<int64_t>> &batch_info) { | ||||
| if (batch_info.empty()) { | if (batch_info.empty()) { | ||||
| GELOGE(ge::FAILED, "Dynamic batch info is empty."); | |||||
| REPORT_INNER_ERROR("E19999", "ParamDynamic batch info is empty. check invalid"); | |||||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch info is empty."); | |||||
| return false; | return false; | ||||
| } | } | ||||
| for (auto resolution : batch_info) { | for (auto resolution : batch_info) { | ||||
| if (resolution.size() != kDynamicImageSizeVecSize) { | if (resolution.size() != kDynamicImageSizeVecSize) { | ||||
| GELOGE(ge::FAILED, "Dynamic resolution param num is %zu, current resolution size is %zu.", | |||||
| REPORT_INNER_ERROR("E19999", "Dynamic resolution param num is %zu, current resolution size is %zu.", | |||||
| kDynamicImageSizeVecSize, resolution.size()); | |||||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic resolution param num is %zu, current resolution size is %zu.", | |||||
| kDynamicImageSizeVecSize, resolution.size()); | kDynamicImageSizeVecSize, resolution.size()); | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -160,22 +167,28 @@ bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, | |||||
| return true; | return true; | ||||
| } | } | ||||
| } | } | ||||
| GELOGE(ge::FAILED, "Dynamic resolution (%lu,%lu) can not match the gear of model.", image_height, image_width); | |||||
| REPORT_INNER_ERROR("E19999", "Dynamic resolution (%lu,%lu) can not match the gear of model.", | |||||
| image_height, image_width); | |||||
| GELOGE(ge::FAILED, "[Check][Param]Dynamic resolution (%lu,%lu) can not match the gear of model.", | |||||
| image_height, image_width); | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | ||||
| const vector<vector<int64_t>> &batch_info) { | const vector<vector<int64_t>> &batch_info) { | ||||
| if (batch_info.empty()) { | if (batch_info.empty()) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Dynamic batch info is empty."); | |||||
| REPORT_INNER_ERROR("E19999", "param batch_info is empty, check invalid"); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] Dynamic batch info is empty."); | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool find_match = false; | bool find_match = false; | ||||
| for (auto resolution : batch_info) { | for (auto resolution : batch_info) { | ||||
| if (cur_dynamic_dims.size() != resolution.size()) { | if (cur_dynamic_dims.size() != resolution.size()) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Cur dynamic dims param num is %zu, current resolution size is %zu.", | |||||
| REPORT_INNER_ERROR("E19999", "Cur dynamic dims param num is %zu, current resolution size is %zu.", | |||||
| cur_dynamic_dims.size(), resolution.size()); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "[Check][Param] Cur dynamic dims param num is %zu, current resolution size is %zu.", | |||||
| cur_dynamic_dims.size(), resolution.size()); | cur_dynamic_dims.size(), resolution.size()); | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -192,7 +205,7 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||||
| } | } | ||||
| } | } | ||||
| if (!find_match) { | if (!find_match) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "choose dynamic dims can not match the gear of model."); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] choose dynamic dims can not match the gear of model."); | |||||
| } | } | ||||
| return find_match; | return find_match; | ||||
| } | } | ||||
| @@ -241,7 +254,7 @@ Status GeExecutor::Initialize() { | |||||
| Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | ||||
| if (init_hostcpu_engine_status != SUCCESS) { | if (init_hostcpu_engine_status != SUCCESS) { | ||||
| GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine"); | |||||
| GELOGE(init_hostcpu_engine_status, "[initialize][HostCpuEngine] failed"); | |||||
| return init_hostcpu_engine_status; | return init_hostcpu_engine_status; | ||||
| } | } | ||||
| @@ -251,12 +264,12 @@ Status GeExecutor::Initialize() { | |||||
| mem_type.push_back(RT_MEMORY_P2P_DDR); | mem_type.push_back(RT_MEMORY_P2P_DDR); | ||||
| auto ret = MemManager::Instance().Initialize(mem_type); | auto ret = MemManager::Instance().Initialize(mem_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Memory Manager init failed."); | |||||
| GELOGE(ret, "[Initialize][MemManager] failed."); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), | GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), | ||||
| "Failed to initialize OpsKernelBuilders."); | |||||
| "[Initialize][OpsKernelBuilderManager] failed."); | |||||
| // Start profiling | // Start profiling | ||||
| Options profiling_options; | Options profiling_options; | ||||
| @@ -292,13 +305,18 @@ Status GeExecutor::Finalize() { | |||||
| Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | ||||
| uint64_t batch_size) { | uint64_t batch_size) { | ||||
| if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr is nullptr!"); | |||||
| REPORT_INNER_ERROR("E19999", "param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||||
| "[Check][Param] Dynamic input addr is nullptr, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | ||||
| } | } | ||||
| uint64_t size = sizeof(uint32_t); | uint64_t size = sizeof(uint32_t); | ||||
| if (length < size) { | if (length < size) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); | |||||
| REPORT_INNER_ERROR("E19999", "Dynamic input size [%lu] is less than [%lu], check invalid, model id:%u", | |||||
| length, size, model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||||
| "[Check][Param] Dynamic input size [%lu] is less than [%lu], model id:%u", length, size, model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| if (length >= sizeof(uint64_t)) { | if (length >= sizeof(uint64_t)) { | ||||
| @@ -311,24 +329,28 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | int32_t dynamic_type = static_cast<int32_t>(FIXED); | ||||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Get dynamic input info failed."); | |||||
| REPORT_CALL_ERROR("E19999", "get dynamic batch info failed, model id:%u", model_id); | |||||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { | if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "The current dynamic input does not match the gear of the model."); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, | |||||
| "[Check][Param] The current dynamic input does not match the gear of the model(id:%u).", model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | ||||
| } | } | ||||
| ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_BATCH)); | ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_BATCH)); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Set dynamic size failed"); | |||||
| REPORT_CALL_ERROR("E19999", "set dynamic size failed, model id:%u, dynamic_type:1", model_id); | |||||
| GELOGE(ret, "[Set][DynamicSize] failed, model id:%u, dynamic_type:1", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| // memcpy dynamic_batch_size from host to device | // memcpy dynamic_batch_size from host to device | ||||
| rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE); | rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "memcpy dynamic batch input data failed! ret: 0x%X", rt_ret); | |||||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy, size:%lu ret:0x%X", length, rt_ret); | |||||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic batch input data failed! size:%lu ret:0x%X", length, rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -337,14 +359,19 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||||
| Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | ||||
| uint64_t image_height, uint64_t image_width) { | uint64_t image_height, uint64_t image_width) { | ||||
| if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr is nullptr!"); | |||||
| REPORT_INNER_ERROR("E19999", "param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||||
| "[Check][Param] Dynamic input addr is nullptr, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | ||||
| } | } | ||||
| uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint32_t); | uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint32_t); | ||||
| if (length < dynamic_input_size) { | if (length < dynamic_input_size) { | ||||
| REPORT_INNER_ERROR("E19999", "Dynamic input size [%lu] is less than [%lu], check invalid, model id:%u", | |||||
| length, dynamic_input_size, model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | ||||
| "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||||
| "[Check][Param] Dynamic input size [%lu] is less than [%lu], model id:%u", | |||||
| length, dynamic_input_size, model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| uint64_t size = sizeof(uint32_t); | uint64_t size = sizeof(uint32_t); | ||||
| @@ -357,18 +384,22 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | int32_t dynamic_type = static_cast<int32_t>(FIXED); | ||||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Get dynamic input info failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get dynamic input info failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { | if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "The current dynamic input does not match the gear of the model."); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, | |||||
| "[Check][Param] The current dynamic input does not match the gear of the model, " | |||||
| "image_height:%lu, image_width:%lu.", image_height, image_width); | |||||
| return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | ||||
| } | } | ||||
| ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_IMAGE)); | ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_IMAGE)); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Set dynamic size failed"); | |||||
| REPORT_CALL_ERROR("E19999", "Set dynamic size failed, model id:%u,", model_id); | |||||
| GELOGE(ret, "[Set][DynamicSize] failed, model id:%u", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -376,7 +407,9 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| rtError_t rt_ret = | rtError_t rt_ret = | ||||
| rtMemcpy(dynamic_input_addr, size, &image_height, size, RT_MEMCPY_HOST_TO_DEVICE); | rtMemcpy(dynamic_input_addr, size, &image_height, size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); | |||||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed! size:%lu, ret:0x%X, model id:%u", size, rt_ret, model_id); | |||||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic resolution input data failed! size:%lu, ret:0x%X, model id:%u", | |||||
| size, rt_ret, model_id); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| @@ -385,7 +418,10 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + size), | rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + size), | ||||
| remain_size, &image_width, size, RT_MEMCPY_HOST_TO_DEVICE); | remain_size, &image_width, size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "memcpy dynamic resolution input data failed!"); | |||||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed! size:%lu, ret:0x%X, model id:%u", | |||||
| remain_size, rt_ret, model_id); | |||||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic resolution input data failed! size:%lu, ret:0x%X, model id:%u", | |||||
| remain_size, rt_ret, model_id); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -394,40 +430,48 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | ||||
| const vector<uint64_t> &dynamic_dims) { | const vector<uint64_t> &dynamic_dims) { | ||||
| if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr is nullptr!"); | |||||
| REPORT_INNER_ERROR("E19999", "Param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||||
| "[Check][Param] Dynamic input addr is nullptr, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | ||||
| } | } | ||||
| vector<uint64_t> cur_dynamic_dims; | vector<uint64_t> cur_dynamic_dims; | ||||
| Status ret = GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims); | Status ret = GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Set cur gear dynamic dims failed"); | |||||
| GELOGE(ret, "[Get][CurDynamicDims] failed, model id:%u", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | int32_t dynamic_type = static_cast<int32_t>(FIXED); | ||||
| ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Get dynamic input info failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get dynamic input info failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!IsDynmaicDimsSizeMatchModel(cur_dynamic_dims, batch_info)) { | if (!IsDynmaicDimsSizeMatchModel(cur_dynamic_dims, batch_info)) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "The current dynamic input does not match the gear of the model."); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, | |||||
| "[Check][Param] The current dynamic input does not match the gear of the model, id:%u.", model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | ||||
| } | } | ||||
| ret = GraphExecutor::SetDynamicSize(model_id, cur_dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | ret = GraphExecutor::SetDynamicSize(model_id, cur_dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Set dynamic size failed"); | |||||
| REPORT_CALL_ERROR("E19999", "Set dynamic size failed, model id:%u", model_id); | |||||
| GELOGE(ret, "[Set][DynamicSize] failed, model id:%u", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| size_t dynamic_dim_num = cur_dynamic_dims.size(); | size_t dynamic_dim_num = cur_dynamic_dims.size(); | ||||
| uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint32_t)); | uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint32_t)); | ||||
| if (length < dynamic_input_size) { | if (length < dynamic_input_size) { | ||||
| REPORT_INNER_ERROR("E19999", "input dynamic size [%lu] is less than [%lu], model id:%u", | |||||
| length, dynamic_input_size, model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | ||||
| "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||||
| "[Check][Param] Dynamic input size [%lu] is less than [%lu], model id:%u", | |||||
| length, dynamic_input_size, model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| uint64_t size = sizeof(uint32_t); | uint64_t size = sizeof(uint32_t); | ||||
| @@ -440,7 +484,9 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u | |||||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + size * i), | rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + size * i), | ||||
| length - size * i, &cur_dynamic_dims[i], size, RT_MEMCPY_HOST_TO_DEVICE); | length - size * i, &cur_dynamic_dims[i], size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "memcpy dynamic resolution input data failed!"); | |||||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", (length - size * i), rt_ret); | |||||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic resolution input data failed! size:%lu, ret:0x%X", | |||||
| length - size * i, rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| } | } | ||||
| @@ -454,14 +500,14 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
| vector<ge::TensorDesc> output_desc; | vector<ge::TensorDesc> output_desc; | ||||
| auto ret = GetModelDescInfo(model_id, input_desc, output_desc); | auto ret = GetModelDescInfo(model_id, input_desc, output_desc); | ||||
| if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
| GELOGE(ret, "GetModelDescInfo failed."); | |||||
| GELOGE(ret, "[Get][ModelDescInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| vector<string> user_designate_shape_order; | vector<string> user_designate_shape_order; | ||||
| vector<int64_t> all_data_dims; | vector<int64_t> all_data_dims; | ||||
| ret = GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | ret = GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | ||||
| if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
| GELOGE(ret, "GetUserDesignateShapeOrder failed."); | |||||
| GELOGE(ret, "[Call][GetUserDesignateShapeOrder] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| for (auto &data_name : user_designate_shape_order) { | for (auto &data_name : user_designate_shape_order) { | ||||
| @@ -475,8 +521,10 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
| } | } | ||||
| } | } | ||||
| if (dynamic_dims.size() != all_data_dims.size()){ | if (dynamic_dims.size() != all_data_dims.size()){ | ||||
| REPORT_INNER_ERROR("E19999", "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", | |||||
| dynamic_dims.size(), all_data_dims.size()); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | ||||
| "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", | |||||
| "[Check][Param] Dynamic input size [%lu] is not equal with all data dims size [%lu]!", | |||||
| dynamic_dims.size(), all_data_dims.size()); | dynamic_dims.size(), all_data_dims.size()); | ||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| @@ -484,8 +532,10 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
| if (all_data_dims[i] < 0) { | if (all_data_dims[i] < 0) { | ||||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | cur_dynamic_dims.push_back(dynamic_dims[i]); | ||||
| } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | ||||
| REPORT_INNER_ERROR("E19999", "Static dims should be same, index:%zu value:%lu should be %ld", | |||||
| i, dynamic_dims[i], all_data_dims[i]); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | ||||
| "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
| "[Check][Param] Static dims should be same, index:%zu value:%lu should be %ld", | |||||
| i, dynamic_dims[i], all_data_dims[i]); | i, dynamic_dims[i], all_data_dims[i]); | ||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| @@ -496,12 +546,14 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
| Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | ||||
| GELOGI("Begin to get current shape"); | GELOGI("Begin to get current shape"); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type); | Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Get current shape failed"); | |||||
| REPORT_CALL_ERROR("E19999", "Get Cur Shape failed, model id:%u", model_id); | |||||
| GELOGE(ret, "[Get][CurShape] failed, model id:%u", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -512,11 +564,14 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
| const kAippDynamicPara &aippParms) { | const kAippDynamicPara &aippParms) { | ||||
| GELOGI("Enter to SetDynamicAippData."); | GELOGI("Enter to SetDynamicAippData."); | ||||
| if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic aipp input addr is nullptr!"); | |||||
| REPORT_INNER_ERROR("E19999", "Param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||||
| "[Check][Param] Dynamic aipp input addr is nullptr, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | ||||
| } | } | ||||
| if (aippBatchPara.empty()) { | if (aippBatchPara.empty()) { | ||||
| GELOGE(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "aippBatchPara is empty."); | |||||
| REPORT_INNER_ERROR("E19999", "Param aippBatchPara is empty, check invalid, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "[Check][Param] aippBatchPara is empty, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_AIPP_BATCH_EMPTY; | return ACL_ERROR_GE_AIPP_BATCH_EMPTY; | ||||
| } | } | ||||
| uint64_t batch_num = aippBatchPara.size(); | uint64_t batch_num = aippBatchPara.size(); | ||||
| @@ -527,14 +582,18 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
| "batch num is %lu, struct_len is %lu", | "batch num is %lu, struct_len is %lu", | ||||
| model_id, length, batch_num, struct_len); | model_id, length, batch_num, struct_len); | ||||
| if (struct_len > length) { | if (struct_len > length) { | ||||
| REPORT_INNER_ERROR("E19999", "input dynamic aipp param len:%lu is larger than aipp_data size:%lu", | |||||
| struct_len, length); | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | ||||
| "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); | |||||
| "[Check][Param] input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", | |||||
| struct_len, length); | |||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| // Memcpy real kAippDynamicBatchPara from host to device | // Memcpy real kAippDynamicBatchPara from host to device | ||||
| rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE); | rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "memcpy real_aippParms_size failed! ret: 0x%X", rt_ret); | |||||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", length, rt_ret); | |||||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy aippParms failed! size:%lu, ret:0x%X", length, rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| uint64_t remain_len = length - real_aippParms_size; | uint64_t remain_len = length - real_aippParms_size; | ||||
| @@ -545,7 +604,8 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
| (remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), | (remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), | ||||
| sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE); | sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "memcpy kAippDynamicBatchPara input data failed! ret: 0x%X", rt_ret); | |||||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X", rt_ret); | |||||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy kAippDynamicBatchPara input data failed! ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| } | } | ||||
| @@ -555,12 +615,14 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
| Status GeExecutor::UnloadModel(uint32_t model_id) { | Status GeExecutor::UnloadModel(uint32_t model_id) { | ||||
| GELOGD("unload model %u begin.", model_id); | GELOGD("unload model %u begin.", model_id); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | |||||
| REPORT_CALL_ERROR("E19999", "Destroy Aicpu Session For Infer failed, model id:%u", model_id); | |||||
| GELOGE(ret, "[Destroy][AicpuSession] For Infer failed. model id:%u", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -578,7 +640,8 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
| } | } | ||||
| ret = GraphLoader::UnloadModel(model_id); | ret = GraphLoader::UnloadModel(model_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | |||||
| REPORT_CALL_ERROR("E19999", "unload model failed, model id:%u", model_id); | |||||
| GELOGE(ret, "[Unload][Model] failed. model id:%u", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -588,7 +651,8 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
| Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized, model id:%u", model_id); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized, model id:%u", model_id); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -600,20 +664,26 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, | Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, | ||||
| output_formats, new_model_desc); | output_formats, new_model_desc); | ||||
| if (ret != domi::SUCCESS) { | if (ret != domi::SUCCESS) { | ||||
| GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | |||||
| REPORT_CALL_ERROR("E19999", "get input output desc info failed, ret = %u, model id:%u", ret, model_id); | |||||
| GELOGE(ret, "[Get][InputOutputDescInfo] failed. ret = %u, model id:%u", ret, model_id); | |||||
| return ACL_ERROR_GE_GET_TENSOR_INFO; | return ACL_ERROR_GE_GET_TENSOR_INFO; | ||||
| } | } | ||||
| if (input_formats.size() != input_desc_infos.size()) { | if (input_formats.size() != input_desc_infos.size()) { | ||||
| REPORT_INNER_ERROR("E19999", "input_formats size %zu is not equal to input_desc_infos size %zu, model id:%u.", | |||||
| input_formats.size(), input_desc_infos.size(), model_id); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | GELOGE(ACL_ERROR_GE_PARAM_INVALID, | ||||
| "input_formats size %zu is not equal to input_desc_infos size %zu.", | |||||
| input_formats.size(), input_desc_infos.size()); | |||||
| "[Check][Param] input_formats size %zu is not equal to input_desc_infos size %zu, model id:%u.", | |||||
| input_formats.size(), input_desc_infos.size(), model_id); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| if (output_formats.size() != output_desc_infos.size()) { | if (output_formats.size() != output_desc_infos.size()) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats size %zu is not equal to output_desc_infos size %zu.", | |||||
| output_formats.size(), output_desc_infos.size()); | |||||
| REPORT_INNER_ERROR("E19999", "output_formats size %zu is not equal to output_desc_infos size %zu, model id:%u.", | |||||
| output_formats.size(), output_desc_infos.size(), model_id); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "[Check][Param] output_formats size %zu is not equal to output_desc_infos size %zu, model id:%u.", | |||||
| output_formats.size(), output_desc_infos.size(), model_id); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| @@ -635,13 +705,15 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | ||||
| int32_t &dynamic_type) { | int32_t &dynamic_type) { | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetDynamicBatchInfo failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get Dynamic BatchInfo failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -657,13 +729,15 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||||
| Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64_t>> &batch_info) { | Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64_t>> &batch_info) { | ||||
| GELOGI("Begin to get combined dynamic dims info."); | GELOGI("Begin to get combined dynamic dims info."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetCombinedDynamicDims failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get Combined DynamicDims failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][CombinedDynamicDims] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -680,13 +754,15 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64 | |||||
| /// | /// | ||||
| Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | Status ret = GraphExecutor::GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetUserDesignateShapeOrder failed."); | |||||
| REPORT_CALL_ERROR("E19999", "GetUserDesignateShapeOrder failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Call][GetUserDesignateShapeOrder] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -704,7 +780,8 @@ Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> | |||||
| Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | ||||
| GELOGI("Begin to GetAIPPInfo."); | GELOGI("Begin to GetAIPPInfo."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor not inited yet!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info); | Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info); | ||||
| @@ -719,7 +796,8 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo | |||||
| Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | ||||
| GELOGI("Begin to get aipp type."); | GELOGI("Begin to get aipp type."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not inited yet!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); | Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); | ||||
| @@ -741,8 +819,10 @@ Status GeExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, cons | |||||
| } | } | ||||
| Status ret = GraphExecutor::GetOpAttr(model_id, op_name, attr_name, attr_value); | Status ret = GraphExecutor::GetOpAttr(model_id, op_name, attr_name, attr_value); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str()); | |||||
| REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str()); | |||||
| GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed, model id:%u.", | |||||
| op_name.c_str(), attr_name.c_str(), model_id); | |||||
| REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed, model id:%u", | |||||
| op_name.c_str(), attr_name.c_str(), model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -750,12 +830,14 @@ Status GeExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, cons | |||||
| Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not inited yet!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not inited yet!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info); | Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Get dynamic batch output shape info failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get Model Attr failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][ModelAttr] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -764,7 +846,8 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn | |||||
| Status GeExecutor::CommandHandle(const Command &command) { | Status GeExecutor::CommandHandle(const Command &command) { | ||||
| Status ret = GraphLoader::CommandHandle(command); | Status ret = GraphLoader::CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "CommandHandle: Command Handle failed."); | |||||
| REPORT_CALL_ERROR("E19999", "call CommandHandle failed, ret:%u", ret); | |||||
| GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "[Call][CommandHandle] failed, ret:%u", ret); | |||||
| return ACL_ERROR_GE_COMMAND_HANDLE; | return ACL_ERROR_GE_COMMAND_HANDLE; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -773,7 +856,8 @@ Status GeExecutor::CommandHandle(const Command &command) { | |||||
| Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | ||||
| GELOGI("Get max used memory begin."); | GELOGI("Get max used memory begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -793,14 +877,15 @@ Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | |||||
| Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) { | Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) { | ||||
| GELOGI("Load data from file begin."); | GELOGI("Load data from file begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| string filePath = RealPath(path.c_str()); | string filePath = RealPath(path.c_str()); | ||||
| if (filePath.empty()) { | if (filePath.empty()) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, | GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, | ||||
| "File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
| "[Call][RealPath] File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | ||||
| } | } | ||||
| GELOGI("load modelData from file: %s.", path.c_str()); | GELOGI("load modelData from file: %s.", path.c_str()); | ||||
| @@ -829,7 +914,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||||
| Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | ||||
| void *weight_ptr, size_t weight_size) { | void *weight_ptr, size_t weight_size) { | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not inited yet!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -850,7 +936,8 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||||
| const std::vector<uint32_t> &output_queue_ids) { | const std::vector<uint32_t> &output_queue_ids) { | ||||
| GELOGI("Load model with queue begin."); | GELOGI("Load model with queue begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | ||||
| @@ -889,7 +976,8 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
| const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | ||||
| std::vector<GeTensorDesc> &output_desc, bool async_mode) { | std::vector<GeTensorDesc> &output_desc, bool async_mode) { | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -904,7 +992,8 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | int32_t dynamic_type = static_cast<int32_t>(FIXED); | ||||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Get dynamic input info failed."); | |||||
| REPORT_CALL_ERROR("E19999", "get dynamic batch info failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (!batch_info.empty()) { | if (!batch_info.empty()) { | ||||
| @@ -926,14 +1015,16 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
| Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) { | Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) { | ||||
| GELOGI("Get memory and weight size from file begin."); | GELOGI("Get memory and weight size from file begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| ModelData model; | ModelData model; | ||||
| Status ret = ge::GraphLoader::LoadDataFromFile(path, 0, model); | Status ret = ge::GraphLoader::LoadDataFromFile(path, 0, model); | ||||
| if ((ret != SUCCESS) || (model.model_data == nullptr)) { | if ((ret != SUCCESS) || (model.model_data == nullptr)) { | ||||
| GELOGE(ret, "Load data from file failed. ret = %d", ret); | |||||
| REPORT_CALL_ERROR("E19999", "load data from file failed, ret = %d", ret); | |||||
| GELOGE(ret, "[Load][Data] from file failed. ret = %d", ret); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -958,12 +1049,14 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size | |||||
| size_t &weight_size) { | size_t &weight_size) { | ||||
| GELOGI("Get memory and weight size from data begin."); | GELOGI("Get memory and weight size from data begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| if (model_data == nullptr) { | if (model_data == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "invalid model data!"); | |||||
| REPORT_INNER_ERROR("E19999", "param model_data is nullptr, check invalid!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "[Check][Param] invalid model data!"); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID; | ||||
| } | } | ||||
| @@ -997,7 +1090,8 @@ Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge | |||||
| Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | ||||
| std::vector<DataBuffer> &outputs) { | std::vector<DataBuffer> &outputs) { | ||||
| if (executor == nullptr) { | if (executor == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "param is NULL"); | |||||
| REPORT_INNER_ERROR("E19999", "Param executor is nullptr, check invalid"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] param executor is nullptr"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -1021,7 +1115,8 @@ Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) | |||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| auto davinci_model = model_manager->GetModel(model_id); | auto davinci_model = model_manager->GetModel(model_id); | ||||
| if (davinci_model == nullptr) { | if (davinci_model == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id: %d is invaild or model is not loaded.", model_id); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||||
| "[Get][Model] failed, Model id:%u is invaild or model is not loaded.", model_id); | |||||
| return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | ||||
| } | } | ||||
| @@ -1034,7 +1129,7 @@ Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | int32_t dynamic_type = static_cast<int32_t>(FIXED); | ||||
| Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Calc batch info size failed. ret = %d", ret); | |||||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed. ret = %d, model id:%u", ret, model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (batch_info.empty()) { | if (batch_info.empty()) { | ||||
| @@ -1048,13 +1143,15 @@ Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||||
| Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { | Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { | ||||
| GELOGI("Begin to GetOrigInputInfo."); | GELOGI("Begin to GetOrigInputInfo."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info); | Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetOrigInputInfo failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get Orig Input Info failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][OrigInputInfo] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -1067,13 +1164,15 @@ Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||||
| std::vector<InputOutputDims> &output_dims) { | std::vector<InputOutputDims> &output_dims) { | ||||
| GELOGI("Begin to GetAllAippInputOutputDims."); | GELOGI("Begin to GetAllAippInputOutputDims."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); | Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetAllAippInputOutputDims failed."); | |||||
| REPORT_CALL_ERROR("E19999", "Get All Aipp Input Output Dims failed, model id:%u.", model_id); | |||||
| GELOGE(ret, "[Get][AllAippInputOutputDims] failed, model id:%u.", model_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -1085,7 +1184,10 @@ Status GeExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_ | |||||
| GELOGI("Begin to GetOpDescInfo."); | GELOGI("Begin to GetOpDescInfo."); | ||||
| Status ret = GraphExecutor::GetOpDescInfo(device_id, stream_id, task_id, op_desc_info); | Status ret = GraphExecutor::GetOpDescInfo(device_id, stream_id, task_id, op_desc_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetOpDescInfo failed."); | |||||
| REPORT_CALL_ERROR("E19999", "get opdesc info failed, device_id:%u, stream_id:%u, task_id:%u.", | |||||
| device_id, stream_id, task_id); | |||||
| GELOGE(ret, "[Get][OpDescInfo] failed, device_id:%u, stream_id:%u, task_id:%u.", | |||||
| device_id, stream_id, task_id); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("GetOpDescInfo succ."); | GELOGI("GetOpDescInfo succ."); | ||||
| @@ -1096,7 +1198,7 @@ Status GeExecutor::SetDump(const DumpConfig &dump_config) { | |||||
| GELOGI("Start to set dump config"); | GELOGI("Start to set dump config"); | ||||
| auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Set dump conf failed"); | |||||
| GELOGE(ret, "[Set][DumpConf] failed, ret:%d", ret); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Set dump config successfully"); | GELOGI("Set dump config successfully"); | ||||
| @@ -0,0 +1,58 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "ge_opt_info/ge_opt_info.h" | |||||
| #include <string> | |||||
| #include <map> | |||||
| #include "graph/ge_local_context.h" | |||||
| #include "ge/ge_api_types.h" | |||||
| #include "common/debug/ge_log.h" | |||||
| #include "opt_info.h" | |||||
| namespace ge { | |||||
| Status GeOptInfo::SetOptInfo() { | |||||
| std::string soc_ver; | |||||
| graphStatus ret = GetThreadLocalContext().GetOption(SOC_VERSION, soc_ver); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Get soc version failed."); | |||||
| GELOGE(FAILED, "[Get][SocVersion]Get soc version failed."); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGD("Soc version:%s.", soc_ver.c_str()); | |||||
| std::map<std::string, std::string> opt_info; | |||||
| // the first arg does not work at present. | |||||
| if (gelc::GetOptInfo(gelc::kOffline, soc_ver, opt_info) != gelc::SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Get optional information failed, is_offline:%d, soc version:%s", | |||||
| gelc::kOffline, soc_ver.c_str()); | |||||
| GELOGE(FAILED, "[Get][OptInfo]Get optional information failed, is_offline:%d, soc version:%s", | |||||
| gelc::kOffline, soc_ver.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| // do nothing if get empty information | |||||
| if (opt_info.empty()) { | |||||
| GELOGI("Optional information is empty."); | |||||
| return SUCCESS; | |||||
| } | |||||
| std::map<std::string, std::string> graph_options = GetThreadLocalContext().GetAllGraphOptions(); | |||||
| for (const auto &itr : opt_info) { | |||||
| graph_options.emplace(itr.first, itr.second); | |||||
| GELOGI("Get optional information success, key:%s, value:%s.", itr.first.c_str(), itr.second.c_str()); | |||||
| } | |||||
| GetThreadLocalContext().SetGraphOption(graph_options); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -14,23 +14,18 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||||
| #ifndef GE_OPT_INFO_GE_OPT_INFO_H_ | |||||
| #define GE_OPT_INFO_GE_OPT_INFO_H_ | |||||
| #include "stub_engine/ops_kernel_store/op/op.h" | |||||
| #include "ge/ge_api_error_codes.h" | |||||
| #include "register/register_types.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace st { | |||||
| class GE_FUNC_VISIBILITY HostOp : public Op { | |||||
| class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeOptInfo { | |||||
| public: | public: | ||||
| HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | |||||
| ~HostOp() override = default; | |||||
| HostOp &operator=(const HostOp &op) = delete; | |||||
| HostOp(const HostOp &op) = delete; | |||||
| Status Run() override; | |||||
| GeOptInfo() = default; | |||||
| static Status SetOptInfo(); | |||||
| }; | }; | ||||
| } // namespace st | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||||
| #endif // GE_OPT_INFO_GE_OPT_INFO_H_ | |||||
| @@ -16,6 +16,7 @@ | |||||
| #include "ge_runtime/task/hccl_task.h" | #include "ge_runtime/task/hccl_task.h" | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "framework/common/util.h" | |||||
| #include "ge_runtime/task/task_factory.h" | #include "ge_runtime/task/task_factory.h" | ||||
| #include "common/opskernel/ops_kernel_info_store.h" | #include "common/opskernel/ops_kernel_info_store.h" | ||||
| #include "common/opskernel/ge_task_info.h" | #include "common/opskernel/ge_task_info.h" | ||||
| @@ -72,7 +72,7 @@ bool LabelGotoTask::Distribute() { | |||||
| return false; | return false; | ||||
| } | } | ||||
| rt_ret = rtLabelListCpy((void**)label_list.data(), label_list.size(), label_info_, label_info_size); | |||||
| rt_ret = rtLabelListCpy(reinterpret_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | ||||
| return false; | return false; | ||||
| @@ -80,8 +80,7 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node | |||||
| NodePtr func_node = graph->GetParentNode(); | NodePtr func_node = graph->GetParentNode(); | ||||
| if (func_node == nullptr) { | if (func_node == nullptr) { | ||||
| REPORT_INNER_ERROR("E19999", "Parent node not set in node:%s(%s), graph:%s", | |||||
| func_node->GetName().c_str(), func_node->GetType().c_str(), graph->GetName().c_str()); | |||||
| REPORT_INNER_ERROR("E19999", "Parent node not set, graph:%s", graph->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[Get][Node] Parent functional node not set: %s.", graph->GetName().c_str()); | GELOGE(INTERNAL_ERROR, "[Get][Node] Parent functional node not set: %s.", graph->GetName().c_str()); | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -32,7 +32,6 @@ | |||||
| #include "graph/ge_attr_value.h" | #include "graph/ge_attr_value.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "external/graph/ge_error_codes.h" | #include "external/graph/ge_error_codes.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | |||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "graph/optimize/common/params.h" | #include "graph/optimize/common/params.h" | ||||
| #include "external/graph/types.h" | #include "external/graph/types.h" | ||||
| @@ -707,7 +706,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
| if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | ||||
| GE_CHECK_NOTNULL(kernel_buffer.GetData()); | GE_CHECK_NOTNULL(kernel_buffer.GetData()); | ||||
| std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | ||||
| tbe_kernel = std::make_shared<OpKernelBin>(kernel_name, std::move(data)); | |||||
| tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||||
| GE_CHECK_NOTNULL(tbe_kernel); | GE_CHECK_NOTNULL(tbe_kernel); | ||||
| GELOGI("Node [%s][%s] start recovery extra attr %s from %s", node_op_desc->GetName().c_str(), | GELOGI("Node [%s][%s] start recovery extra attr %s from %s", node_op_desc->GetName().c_str(), | ||||
| node_op_desc->GetType().c_str(), ge::OP_EXTATTR_NAME_TBE_KERNEL, ATTR_NAME_TBE_KERNEL_NAME.c_str()); | node_op_desc->GetType().c_str(), ge::OP_EXTATTR_NAME_TBE_KERNEL, ATTR_NAME_TBE_KERNEL_NAME.c_str()); | ||||
| @@ -793,7 +793,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| GELOGI("Start AutoFindBpOpIndex"); | GELOGI("Start AutoFindBpOpIndex"); | ||||
| NodePtr bp_node = nullptr; | NodePtr bp_node = nullptr; | ||||
| uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
| uint32_t netoutput_idx = 0; | |||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| @@ -811,7 +810,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | ||||
| if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
| bp_node = node; | bp_node = node; | ||||
| netoutput_idx = current_idx - 1; | |||||
| } | } | ||||
| } | } | ||||
| if (graph->GetNeedIteration()) { | if (graph->GetNeedIteration()) { | ||||
| @@ -836,34 +834,30 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
| GELOGW("not find bp_node."); | GELOGW("not find bp_node."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { | |||||
| profiling_point.bp_index = netoutput_idx; | |||||
| GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); | |||||
| } else { | |||||
| profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||||
| } | } | ||||
| return SUCCESS; | |||||
| return FindLastBpFromBpNode(graph, bp_node, profiling_point.bp_index); | |||||
| } | } | ||||
| uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const { | |||||
| uint32_t last_bp = 0; | |||||
| Status TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &target_node, | |||||
| uint32_t &bp_index) const { | |||||
| bp_index = 0; | |||||
| auto target_desc = target_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(target_desc); | |||||
| OpDescPtr bp_op_desc = nullptr; | OpDescPtr bp_op_desc = nullptr; | ||||
| for (auto &in_anchor : bp_node->GetAllInDataAnchors()) { | |||||
| auto out_anchor = in_anchor->GetPeerOutAnchor(); | |||||
| if (out_anchor == nullptr || out_anchor->GetOwnerNode() == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto out_node_desc = out_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(out_node_desc); | |||||
| if (bp_op_desc == nullptr || ((out_node_desc->GetId()) > (bp_op_desc->GetId()))) { | |||||
| bp_op_desc = out_node_desc; | |||||
| for (auto &in_node : target_node->GetInAllNodes()) { | |||||
| GE_CHECK_NOTNULL(in_node); | |||||
| auto in_node_desc = in_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(in_node_desc); | |||||
| if ((bp_op_desc == nullptr || (in_node_desc->GetId() > bp_op_desc->GetId())) && | |||||
| (in_node_desc->GetStreamId() == target_desc->GetStreamId())){ | |||||
| bp_op_desc = in_node_desc; | |||||
| } | } | ||||
| GELOGI("bp_op_desc is %s, id is %ld", bp_op_desc->GetName().c_str(), bp_op_desc->GetId()); | |||||
| } | } | ||||
| if (bp_op_desc == nullptr) { | if (bp_op_desc == nullptr) { | ||||
| return last_bp; | |||||
| GELOGI("Did not find bp node."); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
| @@ -871,12 +865,14 @@ uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const | |||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| current_idx++; | current_idx++; | ||||
| if (op_desc->GetName() == bp_op_desc->GetName()) { | if (op_desc->GetName() == bp_op_desc->GetName()) { | ||||
| last_bp = current_idx; | |||||
| GELOGI("First bp name %s, idx %u", op_desc->GetName().c_str(), last_bp); | |||||
| bp_index = current_idx; | |||||
| GELOGI("Find bp name %s, idx %u", op_desc->GetName().c_str(), bp_index); | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| return last_bp; | |||||
| GELOGI("Last bp node[%s], type[%s], index[%u], stream id[%ld]", bp_op_desc->GetName().c_str(), | |||||
| bp_op_desc->GetType().c_str(), bp_index, bp_op_desc->GetStreamId()); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | ||||
| @@ -116,7 +116,7 @@ class TaskGenerator { | |||||
| Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; | Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; | ||||
| Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | ||||
| vector<uint32_t> &all_reduce_nodes) const; | vector<uint32_t> &all_reduce_nodes) const; | ||||
| uint32_t FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const; | |||||
| Status FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node, uint32_t &bp_index) const; | |||||
| Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | ||||
| ProfilingPoint &profiling_point) const; | ProfilingPoint &profiling_point) const; | ||||
| @@ -645,6 +645,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | ||||
| GE_CHECK_NOTNULL(args_addr); | |||||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
| if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
| REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X", args_size_, sec_ret); | REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X", args_size_, sec_ret); | ||||
| @@ -1000,6 +1001,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| // copy args to new host memory | // copy args to new host memory | ||||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | ||||
| GE_CHECK_NOTNULL(args_addr); | |||||
| GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | ||||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
| if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <utility> | #include <utility> | ||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "graph/manager/graph_mem_manager.h" | #include "graph/manager/graph_mem_manager.h" | ||||
| namespace ge { | namespace ge { | ||||
| @@ -94,7 +93,8 @@ void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | |||||
| } | } | ||||
| } | } | ||||
| CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | |||||
| CachingAllocator::CachingAllocator(rtMemType_t memory_type) | |||||
| : memory_type_(memory_type), memory_allocator_(nullptr), called_malloc_counts_(0), called_free_counts_(0) { | |||||
| for (uint32_t i = 0; i < kNumBins; i++) { | for (uint32_t i = 0; i < kNumBins; i++) { | ||||
| free_block_bins_[i] = nullptr; | free_block_bins_[i] = nullptr; | ||||
| } | } | ||||
| @@ -121,6 +121,8 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||||
| if (memory_allocator_ == nullptr) { | if (memory_allocator_ == nullptr) { | ||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
| } | } | ||||
| called_malloc_counts_ = 0; | |||||
| called_free_counts_ = 0; | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| @@ -133,6 +135,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||||
| uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | ||||
| GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | ||||
| called_malloc_counts_++; | |||||
| size = GetBlockSize(size); | size = GetBlockSize(size); | ||||
| uint8_t *ptr = nullptr; | uint8_t *ptr = nullptr; | ||||
| Block *block = FindFreeBlock(size, org_ptr, device_id); | Block *block = FindFreeBlock(size, org_ptr, device_id); | ||||
| @@ -156,6 +159,7 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device | |||||
| Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { | Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { | ||||
| GELOGI("Free device id = %u", device_id); | GELOGI("Free device id = %u", device_id); | ||||
| called_free_counts_++; | |||||
| if (ptr == nullptr) { | if (ptr == nullptr) { | ||||
| REPORT_INNER_ERROR("E19999", "Param ptr is nullptr, device_id:%u, check invalid", device_id); | REPORT_INNER_ERROR("E19999", "Param ptr is nullptr, device_id:%u, check invalid", device_id); | ||||
| GELOGE(PARAM_INVALID, "[Check][Param] Invalid memory pointer, device_id:%u", device_id); | GELOGE(PARAM_INVALID, "[Check][Param] Invalid memory pointer, device_id:%u", device_id); | ||||
| @@ -283,6 +287,7 @@ Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { | |||||
| if (memory_addr == nullptr) { | if (memory_addr == nullptr) { | ||||
| GELOGE(ge::FAILED, "[Malloc][Memory] failed, no enough memory for size = %zu, device_id = %u", memory_size, | GELOGE(ge::FAILED, "[Malloc][Memory] failed, no enough memory for size = %zu, device_id = %u", memory_size, | ||||
| device_id); | device_id); | ||||
| PrintStatics(DLOG_ERROR); | |||||
| return ge::FAILED; | return ge::FAILED; | ||||
| } | } | ||||
| GELOGT(TRACE_RUNNING, "Try to free cached memory size:%zu and malloc memory size:%zu success.", | GELOGT(TRACE_RUNNING, "Try to free cached memory size:%zu and malloc memory size:%zu success.", | ||||
| @@ -385,14 +390,14 @@ void CachingAllocator::FreeBlockBins() { | |||||
| } | } | ||||
| void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | ||||
| GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count); | |||||
| GEEVENT("%6s total[size:%11zu count:%11zu].", name.c_str(), total_size, total_count); | |||||
| for (auto &it : count) { | for (auto &it : count) { | ||||
| GELOGI(" |- block[size:%10zu count:%10zu].", it.first, it.second); | |||||
| GEEVENT(" |- block[size:%11zu count:%11zu].", it.first, it.second); | |||||
| } | } | ||||
| } | } | ||||
| void CachingAllocator::PrintStatics() { | |||||
| if (!IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { | |||||
| void CachingAllocator::PrintStatics(int32_t level) { | |||||
| if (!IsLogEnable(GE_MODULE_NAME, level)) { | |||||
| return; | return; | ||||
| } | } | ||||
| size_t total_using_size = 0; | size_t total_using_size = 0; | ||||
| @@ -435,6 +440,7 @@ void CachingAllocator::PrintStatics() { | |||||
| } | } | ||||
| } while (0); | } while (0); | ||||
| GEEVENT("Called counts[malloc:%11zu free:%11zu].", called_malloc_counts_.load(), called_free_counts_.load()); | |||||
| PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count); | PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count); | ||||
| PrintCount(using_block_stat, "Using", total_using_size, total_using_count); | PrintCount(using_block_stat, "Using", total_using_size, total_using_count); | ||||
| PrintCount(free_block_stat, "Free", total_free_size, total_free_count); | PrintCount(free_block_stat, "Free", total_free_size, total_free_count); | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <unordered_set> | #include <unordered_set> | ||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| #include "graph/manager/block_memory.h" | #include "graph/manager/block_memory.h" | ||||
| @@ -192,9 +193,10 @@ class CachingAllocator { | |||||
| /// | /// | ||||
| /// @ingroup ge_graph | /// @ingroup ge_graph | ||||
| /// @brief print the memory info in pool | /// @brief print the memory info in pool | ||||
| /// @param [in] log level | |||||
| /// @return void | /// @return void | ||||
| /// | /// | ||||
| void PrintStatics(); | |||||
| void PrintStatics(int32_t level = DLOG_INFO); | |||||
| private: | private: | ||||
| rtMemType_t memory_type_; | rtMemType_t memory_type_; | ||||
| @@ -213,6 +215,12 @@ class CachingAllocator { | |||||
| // malloced memorys from device | // malloced memorys from device | ||||
| std::map<size_t, size_t> malloced_memory_; | std::map<size_t, size_t> malloced_memory_; | ||||
| //user call Malloc total counts | |||||
| std::atomic<size_t> called_malloc_counts_; | |||||
| //user call Free total counts | |||||
| std::atomic<size_t> called_free_counts_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_ | #endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_ | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
| #include "common/thread_pool.h" | #include "common/thread_pool.h" | ||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "ge_opt_info/ge_opt_info.h" | |||||
| #include "analyzer/analyzer.h" | #include "analyzer/analyzer.h" | ||||
| #include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
| #include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
| @@ -1002,6 +1003,12 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = GeOptInfo::SetOptInfo(); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "[Set][OptInfo] Set optional information failed."); | |||||
| return ret; | |||||
| } | |||||
| /// 1. BUILD_MODE_TUNING with BUILD_STEP_AFTER_UB_MATCH no need PreRunOptimizeOriginalGraph; | /// 1. BUILD_MODE_TUNING with BUILD_STEP_AFTER_UB_MATCH no need PreRunOptimizeOriginalGraph; | ||||
| /// 2. BUILD_MODE_TUNING with BUILD_STEP_AFTER_MERGE no need PreRunOptimizeOriginalGraph. | /// 2. BUILD_MODE_TUNING with BUILD_STEP_AFTER_MERGE no need PreRunOptimizeOriginalGraph. | ||||
| /// 3. BUILD_MODE_TUNING with BUILD_STEP_AFTER_BUILDER_SUB no need PreRunOptimizeOriginalGraph. | /// 3. BUILD_MODE_TUNING with BUILD_STEP_AFTER_BUILDER_SUB no need PreRunOptimizeOriginalGraph. | ||||
| @@ -194,35 +194,6 @@ ge::Status VarResource::GetBroadCastInfo(uint32_t graph_id, const string &var_na | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| ge::Status VarResource::SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| GE_CHECK_NOTNULL(base_ptr); | |||||
| GELOGI("SyncVarData2BroadCast graph_id: %u, var_name: %s.", graph_id, var_name.c_str()); | |||||
| VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | |||||
| uint8_t *dst_addr = base_ptr + var_broadcast_info.input_offset; | |||||
| return ge::TransVarDataUtils::SyncVarData2BroadCast(var_name, var_tensor_desc, dst_addr, | |||||
| var_broadcast_info.input_size, session_id_); | |||||
| } | |||||
| ge::Status VarResource::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| GELOGI("SyncBroadCastData2Var var_name: %s", var_name.c_str()); | |||||
| VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | |||||
| // subgraph base_ptr could be nullptr, task it as base 0 | |||||
| uint8_t *dst_addr = base_ptr + var_broadcast_info.output_offset; | |||||
| return ge::TransVarDataUtils::SyncBroadCastData2Var(dst_addr, var_broadcast_info.output_size, var_name, | |||||
| var_tensor_desc, session_id_); | |||||
| } | |||||
| ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_name, | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
| } | |||||
| bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } | bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } | ||||
| rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { | rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { | ||||
| @@ -638,16 +609,6 @@ bool VarManager::IsVarExist(const std::string &var_name) { | |||||
| return var_resource_->IsVarExist(var_name); | return var_resource_->IsVarExist(var_name); | ||||
| } | } | ||||
| ge::Status VarManager::SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr) { | |||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
| if (var_resource_ == nullptr) { | |||||
| GELOGW("VarManager has not been init."); | |||||
| return ge::INTERNAL_ERROR; | |||||
| } | |||||
| return var_resource_->SyncVarData(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
| } | |||||
| ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | ||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
| GELOGI("VarManager::GetCurVarDesc var_name = %s.", var_name.c_str()); | GELOGI("VarManager::GetCurVarDesc var_name = %s.", var_name.c_str()); | ||||
| @@ -701,16 +662,6 @@ ge::Status VarManager::RenewCurVarDesc(const std::string &var_name, ge::OpDescPt | |||||
| return var_resource_->RenewCurVarDesc(var_name, std::move(op_desc)); | return var_resource_->RenewCurVarDesc(var_name, std::move(op_desc)); | ||||
| } | } | ||||
| ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
| if (var_resource_ == nullptr) { | |||||
| GELOGW("VarManager has not been init."); | |||||
| return ge::INTERNAL_ERROR; | |||||
| } | |||||
| return var_resource_->SyncBroadCastData2Var(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
| } | |||||
| bool VarManager::IsVarAddr(const int64_t &offset) { | bool VarManager::IsVarAddr(const int64_t &offset) { | ||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
| if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
| @@ -118,15 +118,6 @@ class VarResource { | |||||
| ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ||||
| ge::Status SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr); | |||||
| Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | ||||
| if (var_to_trans_road_.find(var_name) != var_to_trans_road_.end()) { | if (var_to_trans_road_.find(var_name) != var_to_trans_road_.end()) { | ||||
| GELOGW("Var name: %s has already set.", var_name.c_str()); | GELOGW("Var name: %s has already set.", var_name.c_str()); | ||||
| @@ -234,16 +225,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||||
| ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | ||||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr); | |||||
| ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | ||||
| ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ||||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr); | |||||
| ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | ||||
| ge::Status RenewCurVarDesc(const std::string &var_name, ge::OpDescPtr op_desc); | ge::Status RenewCurVarDesc(const std::string &var_name, ge::OpDescPtr op_desc); | ||||
| @@ -415,72 +415,6 @@ Status CopyTensorFromSrcVarNode(const NodePtr &var_src, | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||||
| uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id) { | |||||
| GE_CHK_BOOL_RET_STATUS(dst_addr != nullptr, FAILED, "[Check][Param] dst addr is nullptr."); | |||||
| uint8_t *src_host_addr = nullptr; | |||||
| int64_t src_addr_size = 0; | |||||
| GE_MAKE_GUARD_RTMEM(src_host_addr); | |||||
| GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id)); | |||||
| GELOGI("src_addr_size: %ld, dst_addr_size: %ld", src_addr_size, dst_addr_size); | |||||
| GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, | |||||
| "[Check][Param] src_addr_size:%ld not equal to dst_addr_size:%ld", | |||||
| src_addr_size, dst_addr_size); | |||||
| GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TransVarDataUtils::SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, | |||||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { | |||||
| GE_CHK_BOOL_RET_STATUS(src_addr != nullptr, FAILED, "[Check][Param] src addr is nullptr. "); | |||||
| uint8_t *host_addr = nullptr; | |||||
| GE_MAKE_GUARD_RTMEM(host_addr); | |||||
| GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(&host_addr), src_addr_size)); | |||||
| GE_CHK_RT_RET(rtMemcpy(host_addr, src_addr_size, src_addr, src_addr_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
| GE_CHK_STATUS_RET( | |||||
| SyncTensorToDevice(var_name, reinterpret_cast<uint8_t *>(host_addr), src_addr_size, dst_tensor_desc, session_id)); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||||
| uint8_t **host_addr, int64_t &src_tensor_size, uint64_t session_id) { | |||||
| GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(src_tensor_desc, src_tensor_size), "[Get][Size] from TensorDesc failed"); | |||||
| uint8_t *src_addr = nullptr; | |||||
| GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); | |||||
| uint8_t *mem_addr = | |||||
| src_addr - | |||||
| static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||||
| static_cast<int64_t>( | |||||
| reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||||
| GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); | |||||
| GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
| GELOGI("SyncTensorToHost var_name %s, src_tensor_size %ld", var_name.c_str(), src_tensor_size); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8_t *host_addr, uint32_t addr_size, | |||||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { | |||||
| uint8_t *dst_addr = nullptr; | |||||
| GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); | |||||
| uint8_t *mem_addr = | |||||
| dst_addr - | |||||
| static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||||
| static_cast<int64_t>( | |||||
| reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||||
| GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TransVarDataUtils::TransAllVarData(const vector<NodePtr> &variable_nodes, | Status TransVarDataUtils::TransAllVarData(const vector<NodePtr> &variable_nodes, | ||||
| uint64_t session_id, | uint64_t session_id, | ||||
| rtContext_t context, | rtContext_t context, | ||||
| @@ -29,11 +29,6 @@ | |||||
| namespace ge { | namespace ge { | ||||
| class TransVarDataUtils { | class TransVarDataUtils { | ||||
| public: | public: | ||||
| static ge::Status SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||||
| uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id_); | |||||
| static ge::Status SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, | |||||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id_); | |||||
| static ge::Status TransAllVarData(const std::vector<NodePtr> &variable_nodes, | static ge::Status TransAllVarData(const std::vector<NodePtr> &variable_nodes, | ||||
| uint64_t session_id, | uint64_t session_id, | ||||
| rtContext_t context, | rtContext_t context, | ||||
| @@ -41,12 +36,6 @@ class TransVarDataUtils { | |||||
| uint32_t thread_num = 16); | uint32_t thread_num = 16); | ||||
| static ge::Status CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id); | static ge::Status CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id); | ||||
| private: | |||||
| static ge::Status SyncTensorToHost(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||||
| uint8_t **host_addr, int64_t &addr_size, uint64_t session_id_); | |||||
| static ge::Status SyncTensorToDevice(const string &var_name, const uint8_t *host_addr, uint32_t addr_size, | |||||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id_); | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -20,17 +20,23 @@ | |||||
| #include "external/graph/operator_factory.h" | #include "external/graph/operator_factory.h" | ||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| namespace ge { | namespace ge { | ||||
| const int64_t kStartCallNum = 1; | const int64_t kStartCallNum = 1; | ||||
| const std::string kKernelLibName = "aicpu_tf_kernel"; | const std::string kKernelLibName = "aicpu_tf_kernel"; | ||||
| // tf_kernel.json opsFlag config | |||||
| const std::string kOpsFlagClose = "0"; | const std::string kOpsFlagClose = "0"; | ||||
| Status RunOpKernelWithCheck(NodePtr &node, | |||||
| const vector<ConstGeTensorPtr> &inputs, | |||||
| std::vector<GeTensorPtr> &outputs) { | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_ge_constant_folding_; | |||||
| } | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_op_constant_folding_; | |||||
| } | |||||
| Status ConstantFoldingPass::RunOpKernelWithCheck(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||||
| std::vector<GeTensorPtr> &outputs) { | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] GE is not initialized or is finalized."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] GE is not initialized or is finalized."); | ||||
| @@ -47,15 +53,13 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||||
| if (ops_flag == kOpsFlagClose) { | if (ops_flag == kOpsFlagClose) { | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| return FoldingPass::RunOpKernel(node, inputs, outputs); | |||||
| return RunOpKernel(node, inputs, outputs); | |||||
| } | } | ||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_ge_constant_folding_; | |||||
| } | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_op_constant_folding_; | |||||
| Status ConstantFoldingPass::RunOpKernel(NodePtr &node, | |||||
| const vector<ConstGeTensorPtr> &inputs, | |||||
| std::vector<GeTensorPtr> &outputs) { | |||||
| return HostCpuEngine::GetInstance().Run(node, inputs, outputs); | |||||
| } | } | ||||
| Status ConstantFoldingPass::Run(ge::NodePtr &node) { | Status ConstantFoldingPass::Run(ge::NodePtr &node) { | ||||
| @@ -28,6 +28,11 @@ class ConstantFoldingPass : public FoldingPass { | |||||
| Status Run(ge::NodePtr &node) override; | Status Run(ge::NodePtr &node) override; | ||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | ||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | ||||
| static Status RunOpKernel(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, vector<GeTensorPtr> &outputs); | |||||
| static Status RunOpKernelWithCheck(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||||
| std::vector<GeTensorPtr> &outputs); | |||||
| private: | private: | ||||
| std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | ||||
| std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | ||||
| @@ -28,8 +28,6 @@ | |||||
| #include "inc/kernel.h" | #include "inc/kernel.h" | ||||
| #include "inc/kernel_factory.h" | #include "inc/kernel_factory.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace folding_pass { | namespace folding_pass { | ||||
| @@ -123,12 +121,6 @@ NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tens | |||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| Status FoldingPass::RunOpKernel(NodePtr &node, | |||||
| const vector<ConstGeTensorPtr> &inputs, | |||||
| std::vector<GeTensorPtr> &outputs) { | |||||
| return HostCpuEngine::GetInstance().Run(node, inputs, outputs); | |||||
| } | |||||
| Status FoldingPass::Folding(NodePtr &node, vector<GeTensorPtr> &outputs) { | Status FoldingPass::Folding(NodePtr &node, vector<GeTensorPtr> &outputs) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GELOGD("begin folding node:%s", node->GetName().c_str()); | GELOGD("begin folding node:%s", node->GetName().c_str()); | ||||
| @@ -34,8 +34,6 @@ bool IsNoNeedConstantFolding(const NodePtr &node); | |||||
| using IndexsToAnchors = std::map<int, std::vector<InDataAnchorPtr>>; | using IndexsToAnchors = std::map<int, std::vector<InDataAnchorPtr>>; | ||||
| class FoldingPass : public BaseNodePass { | class FoldingPass : public BaseNodePass { | ||||
| public: | |||||
| static Status RunOpKernel(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, vector<GeTensorPtr> &outputs); | |||||
| protected: | protected: | ||||
| Status Folding(NodePtr &node, vector<GeTensorPtr> &outputs); | Status Folding(NodePtr &node, vector<GeTensorPtr> &outputs); | ||||
| private: | private: | ||||
| @@ -0,0 +1,385 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "infer_base_pass.h" | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| namespace ge { | |||||
| namespace { | |||||
| graphStatus FindValidSubgraphNetoutput(const ConstNodePtr &node, const ComputeGraphPtr &sub_graph, NodePtr &netoutput) { | |||||
| auto sub_nodes = sub_graph->GetDirectNode(); | |||||
| for (size_t i = sub_nodes.size(); i > 0; --i) { | |||||
| auto sub_node = sub_nodes.at(i - 1); | |||||
| if (sub_node->GetType() == NETOUTPUT) { | |||||
| if (sub_node == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "NetOutput node is null in subgraph %s, parent node %s.", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Check][Param] NetOutput node is null on sub graph %s, parent node %s", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| auto sub_node_opdesc = sub_node->GetOpDesc(); | |||||
| if (sub_node_opdesc == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "Invalid NetOutput node in subgraph %s, parent node %s, no OpDesc on it", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Check][Param] Invalid NetOutput node on sub graph %s, parent node %s, no OpDesc on it", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| netoutput = sub_node; | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| } | |||||
| REPORT_INNER_ERROR("E19999", "Can not find the NetOutput node in subgraph %s, parent node %s", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Check][Param] Can not find the NetOutput node in subgraph %s, parent node %s", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| } // namespace | |||||
| Status InferBasePass::Run(NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| bool need_infer = NeedInfer(node); | |||||
| if (!need_infer) { | |||||
| GELOGD("Node %s does not need to infer.", node->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| std::set<NodePtr> changed_nodes; | |||||
| auto ret = InferAndUpdate(node, !OptionExists(kOptimizeAfterSubGraph), changed_nodes); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGE(ret, "Infer and update for node %s failed! ret: %u", node->GetName().c_str(), ret); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| AddChangedNodesImmediateRepass(changed_nodes); | |||||
| return SUCCESS; | |||||
| } | |||||
| bool InferBasePass::NeedInfer(const NodePtr &node) const { return true; } | |||||
| void InferBasePass::AddChangedNodesImmediateRepass(const std::set<NodePtr> &changed_nodes) { | |||||
| // need passed_nodes set to solve the problem that multi-input operators do repass in advance. | |||||
| // when there is passed_nodes set, wo should call AddImmediateRePassNode for all nodes in changed_nodes. | |||||
| } | |||||
| graphStatus InferBasePass::InferAndUpdate(NodePtr &node, bool before_subgraph, std::set<NodePtr> &changed_nodes) { | |||||
| graphStatus ret; | |||||
| if (ContainsSubgraph(node)) { | |||||
| if (before_subgraph) { | |||||
| ret = UpdateTensorDescToSubgraphData(node); | |||||
| } else { | |||||
| ret = UpdateTensorDescToParentNodeOutput(node); | |||||
| } | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGE(ret, "Update tensor desc failed between parent node %s and subgraphs. ret: %u", node->GetName().c_str(), | |||||
| ret); | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| PrintInOutTensors(node, "before_infer"); | |||||
| ret = Infer(node); | |||||
| PrintInOutTensors(node, "after_infer"); | |||||
| if (ret == GRAPH_NODE_NEED_REPASS) { | |||||
| // if a node need re_pass, it is not necessary to update peer node input. | |||||
| changed_nodes.insert(node); | |||||
| return GRAPH_SUCCESS; | |||||
| } else if (ret != GRAPH_SUCCESS && ret != GRAPH_NOT_CHANGED) { | |||||
| GELOGE(ret, "Infer failed for node %s, ret: %u", node->GetName().c_str(), ret); | |||||
| return ret; | |||||
| } | |||||
| ret = UpdateTensorDescToPeerInputs(node, changed_nodes); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGE(ret, "Node %s updates tensor desc to peer input nodes failed! ret: %u", node->GetName().c_str(), ret); | |||||
| } | |||||
| GELOGD("Node %s infer and update succeeded .", node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| bool InferBasePass::ContainsSubgraph(const NodePtr &node) { | |||||
| auto sub_graph_names = node->GetOpDesc()->GetSubgraphInstanceNames(); | |||||
| return !sub_graph_names.empty(); | |||||
| } | |||||
| graphStatus InferBasePass::UpdateTensorDescToPeerInputs(NodePtr &node, std::set<NodePtr> &changed_nodes) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| for (const auto &out_anchor : node->GetAllOutDataAnchors()) { | |||||
| auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | |||||
| for (const auto &peer_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
| auto peer_anchor_opdesc = peer_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| if (peer_anchor_opdesc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto peer_input_desc = peer_anchor_opdesc->MutableInputDesc(peer_anchor->GetIdx()); | |||||
| if (peer_input_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| bool changed = false; | |||||
| auto ret = UpdateTensorDesc(output_tensor, peer_input_desc, changed); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Update peer input desc failed, node %s.", node->GetName().c_str()); | |||||
| GELOGE(ret, "Update peer input desc failed, node %s.", node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| if (changed) { | |||||
| changed_nodes.insert(peer_anchor->GetOwnerNode()); | |||||
| GELOGD("Node %s update peer node succeeded, peer node %s is changed.", node->GetName().c_str(), | |||||
| peer_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| std::vector<ComputeGraphPtr> InferBasePass::GetCurNodeSubgraphs(const NodePtr &node) { | |||||
| std::vector<ComputeGraphPtr> cur_node_subgraph; | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| auto sub_graph_names = op_desc->GetSubgraphInstanceNames(); | |||||
| if (sub_graph_names.empty()) { | |||||
| return cur_node_subgraph; | |||||
| } | |||||
| auto root_graph = GraphUtils::FindRootGraph(node->GetOwnerComputeGraph()); | |||||
| for (const auto &name : sub_graph_names) { | |||||
| if (name.empty()) { | |||||
| GELOGW("The node %s contains empty subgraph instance name", node->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| auto sub_graph = root_graph->GetSubgraph(name); | |||||
| if (sub_graph == nullptr) { | |||||
| GELOGW("The subgrpah %s for node %s is null.", name.c_str(), node->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| cur_node_subgraph.emplace_back(sub_graph); | |||||
| } | |||||
| return cur_node_subgraph; | |||||
| } | |||||
| graphStatus InferBasePass::UpdateTensorDescToSubgraphData(NodePtr &node) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| for (const auto &sub_graph : GetCurNodeSubgraphs(node)) { | |||||
| for (const auto &node_sub : sub_graph->GetDirectNode()) { | |||||
| if (node_sub->GetType() != DATA) { | |||||
| continue; | |||||
| } | |||||
| auto data_opdesc = node_sub->GetOpDesc(); | |||||
| if (data_opdesc == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "Invalid data node on the sub graph %s parent node %s, no OpDesc", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Get][OpDesc] Invalid data node on the sub graph %s parent node %s, no OpDesc", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| int ref_i; | |||||
| if (!AttrUtils::GetInt(data_opdesc, ATTR_NAME_PARENT_NODE_INDEX, ref_i)) { | |||||
| REPORT_INNER_ERROR("E19999", "Invalid data node on the sub graph %s parent node %s, no ref-index attribute", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Get][Int] Invalid data node on the sub graph %s parent node %s, no ref-index attribute", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| GELOGD("Subgraph Data node ref_index is %d, parent node is %s.", ref_i, node->GetName().c_str()); | |||||
| // In multi-batch, data shape of subgraph is different, no need to refresh. | |||||
| if (data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||||
| GELOGD("While updating subgraph data node, ignore node %s which is created by multi-dims", | |||||
| data_opdesc->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| auto input_desc = op_desc->MutableInputDesc(ref_i); | |||||
| if (input_desc == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "The ref index(%d) on the data %s on the sub graph %s " | |||||
| "parent node %s are incompatible, inputs num %u", | |||||
| ref_i, node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str(), | |||||
| node->GetAllInDataAnchorsSize()); | |||||
| GELOGE(GRAPH_FAILED, | |||||
| "[Call][MutableInputDesc] The ref index(%d) on the data %s on the sub graph %s " | |||||
| "parent node %s are incompatible, inputs num %u", | |||||
| ref_i, node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str(), | |||||
| node->GetAllInDataAnchorsSize()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| GELOGI("Ref index is %d, input_desc dtype is %d, node name is %s", ref_i, input_desc->GetDataType(), | |||||
| node->GetName().c_str()); | |||||
| bool has_tensor_desc_changed = false; | |||||
| auto data_input_td = data_opdesc->MutableInputDesc(0); | |||||
| auto ret = UpdateTensorDesc(input_desc, data_input_td, has_tensor_desc_changed); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Failed to update input desc of data %s on the sub graph %s parent node %s", | |||||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Update][InputDesc] of data %s on the sub graph %s parent node %s failed", | |||||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| auto data_output_td = data_opdesc->MutableOutputDesc(0); | |||||
| ret = UpdateTensorDesc(input_desc, data_output_td, has_tensor_desc_changed); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Failed to update output desc of data %s on the sub graph %s parent node %s", | |||||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, "[Update][OutputDesc] of data %s on the sub graph %s parent node %s failed", | |||||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| GELOGD("Parent node %s update subgraph data %s input and output succeed.", node->GetName().c_str(), | |||||
| data_opdesc->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus InferBasePass::UpdateTensorDescToParentNodeOutput(NodePtr &node) { | |||||
| std::vector<std::vector<GeTensorDescPtr>> ref_out_tensors(node->GetAllOutDataAnchorsSize()); | |||||
| for (const auto &sub_graph : GetCurNodeSubgraphs(node)) { | |||||
| NodePtr netoutput; | |||||
| auto ret = FindValidSubgraphNetoutput(node, sub_graph, netoutput); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| return ret; | |||||
| } | |||||
| auto netoutput_opdesc = netoutput->GetOpDesc(); | |||||
| for (auto &netoutput_in_anchor : netoutput->GetAllInDataAnchors()) { | |||||
| auto netoutput_in_desc = netoutput_opdesc->MutableInputDesc(netoutput_in_anchor->GetIdx()); | |||||
| if (netoutput_in_desc == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "Invalid NetOutput node on sub graph %s, parent node %s, can not find input tensor %d", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str(), netoutput_in_anchor->GetIdx()); | |||||
| GELOGE(GRAPH_FAILED, | |||||
| "[Get][Tensor] Invalid NetOutput node on sub graph %s, parent node %s, can not find input tensor %d", | |||||
| sub_graph->GetName().c_str(), node->GetName().c_str(), netoutput_in_anchor->GetIdx()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| GELOGI("Netoutput in anchor index is %d, input tensor dim is %zu", netoutput_in_anchor->GetIdx(), | |||||
| netoutput_in_desc->GetShape().GetDimNum()); | |||||
| int ref_i; | |||||
| if (!AttrUtils::GetInt(netoutput_in_desc, ATTR_NAME_PARENT_NODE_INDEX, ref_i)) { | |||||
| // if there is no ref index on the TensorDesc, it means the output data will be ignored outer. | |||||
| continue; | |||||
| } | |||||
| GELOGI("Parent node index of edge desc is %d", ref_i); | |||||
| if (ref_i < 0 || static_cast<uint32_t>(ref_i) >= node->GetAllOutDataAnchorsSize()) { | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "Invalid ref_index %d of parent node %s, ref_index should less than %u.", ref_i, | |||||
| node->GetName().c_str(), node->GetAllOutDataAnchorsSize()); | |||||
| GELOGE(GRAPH_FAILED, | |||||
| "[Get][Ref_index] Invalid ref_index %d of parent node %s, ref_index should less than %u.", ref_i, | |||||
| node->GetName().c_str(), node->GetAllOutDataAnchorsSize()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| ref_out_tensors[ref_i].emplace_back(netoutput_in_desc); | |||||
| } | |||||
| } | |||||
| return UpdateParentNodeContainsSubgraphs(node, ref_out_tensors); | |||||
| } | |||||
| graphStatus InferBasePass::UpdateParentNodeContainsSubgraphs( | |||||
| NodePtr &node, const std::vector<std::vector<GeTensorDescPtr>> &ref_out_tensors) { | |||||
| for (size_t i = 0; i < ref_out_tensors.size(); i++) { | |||||
| if (ref_out_tensors[i].empty()) { | |||||
| REPORT_CALL_ERROR("E19999", "Parent node %s ref_index %zu subgraph output tensor list is empty.", | |||||
| node->GetName().c_str(), i); | |||||
| GELOGE(GRAPH_FAILED, "[Param][check] Parent node %s ref_index %zu subgraph output tensor list is empty.", | |||||
| node->GetName().c_str(), i); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| auto node_op_desc = node->GetOpDesc(); | |||||
| auto node_output_td = node_op_desc->MutableOutputDesc(i); | |||||
| if (node_output_td == nullptr) { | |||||
| REPORT_CALL_ERROR("E19999", "Node %s output %zu tensor desc is null.", node->GetName().c_str(), i); | |||||
| GELOGE(GRAPH_FAILED, "[Param][check] Node %s output %zu tensor desc is null.", node->GetName().c_str(), i); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| graphStatus ret; | |||||
| if (node_op_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { | |||||
| ret = UpdateOutputFromSubgraphsForMultiDims(ref_out_tensors[i], node_output_td); | |||||
| } else { | |||||
| ret = UpdateOutputFromSubgraphs(ref_out_tensors[i], node_output_td); | |||||
| } | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Node %s update output %zu tensor desc failed. ret: %u", node->GetName().c_str(), i, | |||||
| ret); | |||||
| GELOGE(GRAPH_FAILED, "[Param][check] Node %s update output %zu tensor desc failed. ret: %u", | |||||
| node->GetName().c_str(), i, ret); | |||||
| return ret; | |||||
| } | |||||
| GELOGD("Parent node %s successfully updated the output tensors from subgraphs.", node->GetName().c_str()); | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| void InferBasePass::PrintInOutTensors(const NodePtr &node, const std::string &phase) { | |||||
| if (!IsLogEnable(GE, DLOG_DEBUG)) { | |||||
| return; | |||||
| } | |||||
| if (node == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid"); | |||||
| GELOGE(GRAPH_FAILED, "[Check][Param] node is null"); | |||||
| return; | |||||
| } | |||||
| ge::OpDescPtr op_desc = node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, REPORT_INNER_ERROR("E19999", "Node has no opdesc, check invalid"); | |||||
| GELOGE(GRAPH_FAILED, "[Get][OpDesc] op_desc is null."); return ); | |||||
| std::stringstream ss; | |||||
| ss << "{"; | |||||
| int32_t in_idx = 0; | |||||
| for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | |||||
| if (input_desc == nullptr) { | |||||
| in_idx++; | |||||
| continue; | |||||
| } | |||||
| if (in_idx > 0) { | |||||
| ss << " "; | |||||
| } | |||||
| ss << "input_" << in_idx << " tensor: "; | |||||
| ss << SerialTensorInfo(input_desc); | |||||
| in_idx++; | |||||
| } | |||||
| int32_t out_idx = 0; | |||||
| for (const auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||||
| if (output_desc == nullptr) { | |||||
| out_idx++; | |||||
| continue; | |||||
| } | |||||
| ss << " "; | |||||
| ss << "output_" << out_idx << " tensor: "; | |||||
| ss << SerialTensorInfo(output_desc); | |||||
| out_idx++; | |||||
| } | |||||
| ss << "}"; | |||||
| GELOGD("Infer tensor dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), ss.str().c_str()); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,65 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_PASSES_INFER_BASE_PASS_H_ | |||||
| #define GE_GRAPH_PASSES_INFER_BASE_PASS_H_ | |||||
| #include "graph/passes/base_pass.h" | |||||
| namespace ge { | |||||
| class InferBasePass : public BaseNodePass { | |||||
| public: | |||||
| Status Run(NodePtr &node) override; | |||||
| graphStatus InferAndUpdate(NodePtr &node, bool before_subgraph, std::set<NodePtr> &changed_nodes); | |||||
| void PrintInOutTensors(const NodePtr &node, const std::string &phase); | |||||
| protected: | |||||
| virtual std::string SerialTensorInfo(const GeTensorDescPtr &tensor_desc) const = 0; | |||||
| virtual bool NeedInfer(const NodePtr &node) const; | |||||
| virtual graphStatus Infer(NodePtr &node) = 0; | |||||
| /** | |||||
| * Update the output TensorDesc by src TensorDesc. This will be called when updating peer node input desc. | |||||
| * @param src, input TensorDesc | |||||
| * @param dst, output TensorDesc to be updated | |||||
| * @return | |||||
| */ | |||||
| virtual graphStatus UpdateTensorDesc(const GeTensorDescPtr &src, GeTensorDescPtr &dst, bool &changed) = 0; | |||||
| /** | |||||
| * Update the output TensorDesc for nodes which contain subgraphs. | |||||
| * In dynamic multi-dims/batch/images size scene, the update process maybe different, | |||||
| * in which case, the `InferBasePass` will call method `UpdateOutputFromSubgraphsForMultiDims` instead. | |||||
| * @param src, input TensorDesc from NetOutput nodes in all subgraphs | |||||
| * @param dst, output TensorDesc to be updated | |||||
| * @return | |||||
| */ | |||||
| virtual graphStatus UpdateOutputFromSubgraphs(const std::vector<GeTensorDescPtr> &src, | |||||
| GeTensorDescPtr &dst) = 0; | |||||
| virtual graphStatus UpdateOutputFromSubgraphsForMultiDims(const std::vector<GeTensorDescPtr> &src, | |||||
| GeTensorDescPtr &dst) = 0; | |||||
| private: | |||||
| void AddChangedNodesImmediateRepass(const std::set<NodePtr> &changed_nodes); | |||||
| bool ContainsSubgraph(const NodePtr &node); | |||||
| std::vector<ComputeGraphPtr> GetCurNodeSubgraphs(const NodePtr &node); | |||||
| graphStatus UpdateTensorDescToSubgraphData(NodePtr &node); | |||||
| graphStatus UpdateTensorDescToParentNodeOutput(NodePtr &node); | |||||
| graphStatus UpdateParentNodeContainsSubgraphs(NodePtr &node, | |||||
| const std::vector<std::vector<GeTensorDescPtr>> &ref_out_tensors); | |||||
| graphStatus UpdateTensorDescToPeerInputs(NodePtr &node, std::set<NodePtr> &changed_nodes); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_PASSES_INFER_BASE_PASS_H_ | |||||
| @@ -0,0 +1,523 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/passes/infer_value_range_pass.h" | |||||
| #include "common/formats/utils/formats_trans_utils.h" | |||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/operator_factory_impl.h" | |||||
| #include "graph/passes/constant_folding_pass.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| #include "common/ge/ge_util.h" | |||||
| using std::unique_ptr; | |||||
| namespace ge { | |||||
| namespace { | |||||
| #define GET_DATA_BY_DTYPE(DTYPE, TYPE) \ | |||||
| case (DTYPE): \ | |||||
| ConstructValueRange<TYPE>(lower_boundary_tensor, upper_boundary_tensor, output_tensor_value_range); \ | |||||
| break; | |||||
| void SerialShapeRange(const GeTensorDescPtr &desc, std::string &desc_str) { | |||||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | |||||
| (void)desc->GetShapeRange(shape_range); | |||||
| desc_str += formats::RangeToString(shape_range); | |||||
| shape_range.clear(); | |||||
| (void)desc->GetOriginShapeRange(shape_range); | |||||
| desc_str += ","; | |||||
| desc_str += formats::RangeToString(shape_range); | |||||
| shape_range.clear(); | |||||
| } | |||||
| Status RunCpuKernelForValueRange(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||||
| std::vector<GeTensorPtr> &outputs) { | |||||
| // RunOpKernelWithCheck, RunOpKernel for test | |||||
| auto ret = ConstantFoldingPass::RunOpKernel(node, inputs, outputs); | |||||
| if (ret != SUCCESS) { | |||||
| auto op_kernel = folding_pass::GetKernelByType(node); | |||||
| if (op_kernel == nullptr) { | |||||
| GELOGW("Calculate value range failed, no op kernel for node %s type %s", node->GetName().c_str(), | |||||
| node->GetType().c_str()); | |||||
| return NOT_CHANGED; | |||||
| } | |||||
| ret = op_kernel->Compute(node->GetOpDesc(), inputs, outputs); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("Calculate value range failed, node %s run cpu kernel failed.", node->GetName().c_str()); | |||||
| return NOT_CHANGED; | |||||
| } | |||||
| } | |||||
| GELOGI("Node %s type %s, run cpu kernel success.", node->GetName().c_str(), node->GetType().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace | |||||
| graphStatus InferValueRangePass::Infer(NodePtr &node) { | |||||
| auto infer_value_range_param = OperatorFactoryImpl::GetInferValueRangePara(node->GetType()); | |||||
| // Use registered func to calculate value range | |||||
| if (!infer_value_range_param.use_cpu_kernel) { | |||||
| if (infer_value_range_param.infer_value_func == nullptr) { | |||||
| GELOGW("The registered func of node %s to infer value range is nullptr.", node->GetName().c_str()); | |||||
| return GRAPH_NOT_CHANGED; | |||||
| } | |||||
| Operator op = OpDescUtils::CreateOperatorFromNode(node); | |||||
| auto ret = node->GetOpDesc()->CallInferValueRangeFunc(op); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGW("Node %s call infer value range func failed, ret: %u.", node->GetName().c_str(), ret); | |||||
| return GRAPH_NOT_CHANGED; | |||||
| } | |||||
| GELOGD("Node %s infer value range func succeed by registered func.", node->GetName().c_str()); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| // if input value range has -1, cpu kernel cannot calculate correctly, so set {1:-1} | |||||
| if (InputHasUnknownValueRange(node)) { | |||||
| GELOGI("Node %s has unknown value range in input tensors, set value range {1:-1}, and skip cpu kernel.", | |||||
| node->GetName().c_str()); | |||||
| return GenerateWorstValueRange(node); | |||||
| } | |||||
| // Use CPU kernel func to calculate value range | |||||
| auto ret = ConstructInputAndInferValueRange(node); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGW("Use CPU kernel to calculate value range failed. node: %s, ret: %u", node->GetName().c_str(), ret); | |||||
| return GRAPH_NOT_CHANGED; | |||||
| } | |||||
| GELOGD("Node %s infer value range func succeed by running cpu kernel.", node->GetName().c_str()); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| std::string InferValueRangePass::SerialTensorInfo(const GeTensorDescPtr &tensor_desc) const { | |||||
| std::stringstream ss; | |||||
| ss << "["; | |||||
| ss << "(shape:[" << tensor_desc->MutableShape().ToString() << "]),"; | |||||
| string range_str; | |||||
| SerialShapeRange(tensor_desc, range_str); | |||||
| ss << "(shape_range:" << range_str << "),"; | |||||
| std::vector<std::pair<int64_t, int64_t>> value_range; | |||||
| (void)tensor_desc->GetValueRange(value_range); | |||||
| string value_range_str = formats::RangeToString(value_range); | |||||
| ss << "(value_range:" << value_range_str << ")]"; | |||||
| return ss.str(); | |||||
| } | |||||
| bool InferValueRangePass::NeedInfer(const NodePtr &node) const { | |||||
| auto infer_value_range_param = OperatorFactoryImpl::GetInferValueRangePara(node->GetType()); | |||||
| if (!infer_value_range_param.is_initialized) { | |||||
| GELOGD("Node %s does not register func to infer value range, skip infer_value_range_pass.", | |||||
| node->GetName().c_str()); | |||||
| return false; | |||||
| } | |||||
| if (infer_value_range_param.when_call == INPUT_IS_DYNAMIC) { | |||||
| // Only do infer for node that all inputs are dynamic, such as shape | |||||
| if (InputIsDynamic(node)) { | |||||
| return true; | |||||
| } | |||||
| GELOGD("Node %s register func to infer value range and when_call is INPUT_IS_DYNAMIC, but check input failed.", | |||||
| node->GetName().c_str()); | |||||
| } else if (infer_value_range_param.when_call == INPUT_HAS_VALUE_RANGE) { | |||||
| // Only do infer for node that all inputs have value_range or node type of inputs is constant/const | |||||
| if (InputIsConstOrHasValueRange(node)) { | |||||
| return true; | |||||
| } | |||||
| GELOGD("Node %s register func to infer value range and when_call is INPUT_HAS_VALUE_RANGE, but check input failed.", | |||||
| node->GetName().c_str()); | |||||
| } | |||||
| GELOGD("Node %s does not need to infer value range, skip infer_value_range_pass.", node->GetName().c_str()); | |||||
| return false; | |||||
| } | |||||
| bool InferValueRangePass::InputIsDynamic(const NodePtr &node) const{ | |||||
| bool input_is_dynamic = false; | |||||
| auto cur_op_desc = node->GetOpDesc(); | |||||
| for (const auto &input_desc : cur_op_desc->GetAllInputsDescPtr()) { | |||||
| auto dims = input_desc->GetShape().GetDims(); | |||||
| for (auto dim : dims) { | |||||
| if (dim == UNKNOWN_DIM || dim == UNKNOWN_DIM_NUM) { | |||||
| input_is_dynamic = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| return input_is_dynamic; | |||||
| } | |||||
| bool InferValueRangePass::InputIsConstOrHasValueRange(const NodePtr &node) const { | |||||
| bool input_is_const_or_has_value_range = true; | |||||
| auto cur_op_desc = node->GetOpDesc(); | |||||
| auto in_data_anchors = node->GetAllInDataAnchors(); | |||||
| for (size_t i = 0; i < in_data_anchors.size(); ++i) { | |||||
| auto peer_out_anchor = in_data_anchors.at(i)->GetPeerOutAnchor(); | |||||
| if (peer_out_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||||
| if (peer_node == nullptr || peer_node->GetOpDesc() == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if ((peer_node->GetType() == CONSTANT) || (peer_node->GetType() == CONSTANTOP)) { | |||||
| continue; | |||||
| } | |||||
| const auto &input_desc = cur_op_desc->GetInputDesc(i); | |||||
| std::vector<std::pair<int64_t, int64_t>> value_range; | |||||
| (void)input_desc.GetValueRange(value_range); | |||||
| if (value_range.empty()) { | |||||
| GELOGD("Node %s input %zu does not have value range, skip infer_value_range_pass for current node.", | |||||
| node->GetName().c_str(), i); | |||||
| input_is_const_or_has_value_range = false; | |||||
| break; | |||||
| } | |||||
| } | |||||
| return input_is_const_or_has_value_range; | |||||
| } | |||||
| bool InferValueRangePass::InputHasUnknownValueRange(const NodePtr &node) const { | |||||
| bool has_unknown_value_range = false; | |||||
| auto cur_op_desc = node->GetOpDesc(); | |||||
| for (const auto &input_desc : cur_op_desc->GetAllInputsDescPtr()) { | |||||
| std::vector<std::pair<int64_t, int64_t>> input_desc_value_range; | |||||
| input_desc->GetValueRange(input_desc_value_range); | |||||
| if (!input_desc_value_range.empty()) { | |||||
| for (const auto &range : input_desc_value_range) { | |||||
| if (range.first == -1 || range.second == -1) { | |||||
| GELOGD("Node %s input tensors have unknown value range, value range is %s.", node->GetName().c_str(), | |||||
| formats::RangeToString(input_desc_value_range).c_str()); | |||||
| has_unknown_value_range = true; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return has_unknown_value_range; | |||||
| } | |||||
| graphStatus InferValueRangePass::UpdateTensorDesc(const GeTensorDescPtr &src, GeTensorDescPtr &dst, bool &changed) { | |||||
| if (src == nullptr || dst == nullptr) { | |||||
| REPORT_CALL_ERROR("E19999", "While updating tensor desc, input desc is null."); | |||||
| GELOGE(GRAPH_FAILED, "[Param][check] While updating tensor desc, input desc is null."); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| changed = false; | |||||
| std::vector<std::pair<int64_t, int64_t>> src_value_range; | |||||
| std::vector<std::pair<int64_t, int64_t>> dst_value_range; | |||||
| (void)src->GetValueRange(src_value_range); | |||||
| (void)dst->GetValueRange(dst_value_range); | |||||
| if (src_value_range != dst_value_range) { | |||||
| GELOGD("While updating tensor desc, value range has been changed, src value range: %s, dst value range: %s.", | |||||
| formats::RangeToString(src_value_range).c_str(), formats::RangeToString(dst_value_range).c_str()); | |||||
| changed = true; | |||||
| } | |||||
| dst->SetValueRange(src_value_range); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus InferValueRangePass::UpdateOutputFromSubgraphs(const std::vector<GeTensorDescPtr> &src, | |||||
| GeTensorDescPtr &dst) { | |||||
| std::vector<std::pair<int64_t, int64_t>> ref_out_tensor_value_range; | |||||
| auto ref_out_tensor = src.at(0); | |||||
| (void)ref_out_tensor->GetValueRange(ref_out_tensor_value_range); | |||||
| for (auto &ref_tensor : src) { | |||||
| std::vector<std::pair<int64_t, int64_t>> ref_tensor_value_range; | |||||
| (void)ref_tensor->GetValueRange(ref_tensor_value_range); | |||||
| if (ref_tensor_value_range.size() != ref_out_tensor_value_range.size()) { | |||||
| GELOGD("Update TensorDesc %s failed, rank of value ranges %s and %s are not the same, skip value range refresh.", | |||||
| dst->GetName().c_str(), formats::RangeToString(ref_out_tensor_value_range).c_str(), | |||||
| formats::RangeToString(ref_tensor_value_range).c_str()); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| for (size_t j = 0; j < ref_out_tensor_value_range.size(); j++) { | |||||
| if ((ref_out_tensor_value_range.at(j).first != ref_tensor_value_range.at(j).first) || | |||||
| (ref_out_tensor_value_range.at(j).second != ref_tensor_value_range.at(j).second)) { | |||||
| ref_out_tensor_value_range[j] = std::make_pair(1, -1); | |||||
| } | |||||
| } | |||||
| } | |||||
| GELOGD("While updating output desc from subgraphs, set parent node desc value range %s.", | |||||
| formats::RangeToString(ref_out_tensor_value_range).c_str()); | |||||
| dst->SetValueRange(ref_out_tensor_value_range); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus InferValueRangePass::UpdateOutputFromSubgraphsForMultiDims(const std::vector<GeTensorDescPtr> &src, | |||||
| GeTensorDescPtr &dst) { | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "Update TensorDesc %s failed. In dynamic multi-dims size scene, there should be no value range.", | |||||
| dst->GetName().c_str()); | |||||
| GELOGE(GRAPH_FAILED, | |||||
| "[Update][TensorDesc] %s failed. In dynamic multi-dims size scene, there should be no value range.", | |||||
| dst->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| graphStatus InferValueRangePass::GenerateWorstValueRange(NodePtr &node) { | |||||
| GELOGI("Node %s does not run cpu kernel, because input value range has -1.", node->GetName().c_str()); | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||||
| auto output_desc = op_desc->MutableOutputDesc(i); | |||||
| if (output_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto output_i_shape = output_desc->GetShape(); | |||||
| auto output_i_shape_size = output_i_shape.GetShapeSize(); | |||||
| if (output_i_shape_size < 0) { | |||||
| GELOGD("Node %s output shape is unknown, cannot infer value range, shape is %s.", node->GetName().c_str(), | |||||
| formats::ShapeToString(output_i_shape).c_str()); | |||||
| return GRAPH_NOT_CHANGED; | |||||
| } | |||||
| std::vector<std::pair<int64_t, int64_t>> output_i_value_range(output_i_shape_size, {1, -1}); | |||||
| if (output_i_shape.IsScalar()) { | |||||
| output_i_value_range.emplace_back(1, -1); | |||||
| } | |||||
| output_desc->SetValueRange(output_i_value_range); | |||||
| GELOGD("Node %s output %zu shape is %s, the generated worst value range is %s.", node->GetName().c_str(), i, | |||||
| formats::ShapeToString(output_i_shape).c_str(), formats::RangeToString(output_i_value_range).c_str()); | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| template <typename T> | |||||
| graphStatus InferValueRangePass::ConstructData(const GeTensorDesc &tensor_desc, bool use_floor_value, | |||||
| GeTensorPtr &output_ptr) { | |||||
| std::vector<std::pair<int64_t, int64_t>> value_range; | |||||
| (void)tensor_desc.GetValueRange(value_range); | |||||
| size_t value_range_data_num = value_range.size(); | |||||
| auto tensor_shape = tensor_desc.GetShape(); | |||||
| bool value_range_and_tensor_shape_matched = true; | |||||
| if (tensor_shape.IsScalar()){ | |||||
| // scalar tensor has only one value_range pair | |||||
| if (value_range_data_num != 1) { | |||||
| value_range_and_tensor_shape_matched = false; | |||||
| } | |||||
| } else { | |||||
| // normal tensor, value_range size is equal to tensor shape size. | |||||
| if (static_cast<int64_t>(value_range_data_num) != tensor_shape.GetShapeSize()) { | |||||
| value_range_and_tensor_shape_matched = false; | |||||
| } | |||||
| } | |||||
| if (!value_range_and_tensor_shape_matched) { | |||||
| GELOGW("Input %s value range and tensor shape do not match. Value range size is %zu, tensor shape is %s.", | |||||
| tensor_desc.GetName().c_str(), value_range_data_num, formats::ShapeToString(tensor_shape).c_str()); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| unique_ptr<T[]> buf(new (std::nothrow) T[value_range_data_num]()); | |||||
| if (buf == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "New buf failed"); | |||||
| GELOGE(MEMALLOC_FAILED, "New buf failed"); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| for (size_t j = 0; j < value_range_data_num; ++j) { | |||||
| auto value_range_j = use_floor_value ? value_range[j].first : value_range[j].second; | |||||
| buf[j] = static_cast<T>(value_range_j); | |||||
| } | |||||
| if (output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), value_range_data_num * sizeof(T)) != GRAPH_SUCCESS) { | |||||
| GELOGW("Set data failed while constructing value range input tensor."); | |||||
| return GRAPH_NOT_CHANGED; | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus InferValueRangePass::ConstructDataByType(const GeTensorDesc &tensor_desc, bool use_floor_value, | |||||
| GeTensorPtr &output_ptr) { | |||||
| graphStatus ret = GRAPH_SUCCESS; | |||||
| auto data_type = tensor_desc.GetDataType(); | |||||
| output_ptr->MutableTensorDesc().SetDataType(data_type); | |||||
| switch (data_type) { | |||||
| case DT_FLOAT: | |||||
| ret = ConstructData<float>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_DOUBLE: | |||||
| ret = ConstructData<double>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_UINT8: | |||||
| ret = ConstructData<uint8_t>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_INT8: | |||||
| ret = ConstructData<int8_t>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_UINT16: | |||||
| ret = ConstructData<uint16_t>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_INT16: | |||||
| ret = ConstructData<int16_t>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_INT32: | |||||
| ret = ConstructData<int32_t>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| case DT_INT64: | |||||
| ret = ConstructData<int64_t>(tensor_desc, use_floor_value, output_ptr); | |||||
| break; | |||||
| default: | |||||
| GELOGW("Data type:%s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| ret = GRAPH_PARAM_INVALID; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| vector<ConstGeTensorPtr> InferValueRangePass::ConstructInputTensors(const NodePtr &node, bool use_floor_value) { | |||||
| vector<ConstGeTensorPtr> input_tensors; | |||||
| auto cur_op_desc = node->GetOpDesc(); | |||||
| auto in_data_anchors = node->GetAllInDataAnchors(); | |||||
| for (size_t i = 0; i < in_data_anchors.size(); ++i) { | |||||
| auto peer_out_anchor = in_data_anchors.at(i)->GetPeerOutAnchor(); | |||||
| if (peer_out_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||||
| if (peer_node == nullptr) { | |||||
| continue; | |||||
| } | |||||
| // construct input tensor by constant node | |||||
| if ((peer_node->GetType() == CONSTANT) || (peer_node->GetType() == CONSTANTOP)) { | |||||
| vector<GeTensorPtr> const_weight = OpDescUtils::MutableWeights(peer_node); | |||||
| if (const_weight.empty()) { | |||||
| GELOGW("MutableWeights failed, weight is empty, node: %s(%s)", peer_node->GetName().c_str(), | |||||
| peer_node->GetType().c_str()); | |||||
| return vector<ConstGeTensorPtr>(); | |||||
| } | |||||
| // const/constant op has only one weight | |||||
| if (const_weight.at(0) == nullptr) { | |||||
| GELOGW("MutableWeights failed, weight of constant is null, node name: %s(%s)", | |||||
| peer_node->GetName().c_str(), peer_node->GetType().c_str()); | |||||
| return vector<ConstGeTensorPtr>(); | |||||
| } | |||||
| input_tensors.push_back(const_weight.at(0)); | |||||
| GELOGD("Node %s construct input tensor %zu by constant node.", node->GetName().c_str(), input_tensors.size()); | |||||
| continue; | |||||
| } | |||||
| // construct input tensor by boundary of value range | |||||
| const auto &input_tensor_desc = cur_op_desc->GetInputDesc(i); | |||||
| GeTensorPtr tmp_tensor_ptr = MakeShared<GeTensor>(input_tensor_desc); | |||||
| if (tmp_tensor_ptr == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "Make shared failed"); | |||||
| GELOGE(MEMALLOC_FAILED, "Make shared failed"); | |||||
| return vector<ConstGeTensorPtr>(); | |||||
| } | |||||
| auto ret = ConstructDataByType(input_tensor_desc, use_floor_value, tmp_tensor_ptr); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGW("Construct input tensor by boundary of value range failed for input %s.", | |||||
| input_tensor_desc.GetName().c_str()); | |||||
| return vector<ConstGeTensorPtr>(); | |||||
| } | |||||
| input_tensors.push_back(tmp_tensor_ptr); | |||||
| GELOGD("Node %s construct input tensor %zu by input desc value range.", node->GetName().c_str(), | |||||
| input_tensors.size()); | |||||
| } | |||||
| return input_tensors; | |||||
| } | |||||
| graphStatus InferValueRangePass::ConstructInputAndInferValueRange(NodePtr &node) { | |||||
| auto inputs = ConstructInputTensors(node, true); | |||||
| if (inputs.empty()) { | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| vector<GeTensorPtr> lower_boundary_outputs; | |||||
| auto ret = RunCpuKernelForValueRange(node, inputs, lower_boundary_outputs); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("Node %s run cpu kernel failed while calculating value range.", node->GetName().c_str()); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| inputs = ConstructInputTensors(node, false); | |||||
| if (inputs.empty()) { | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| vector<GeTensorPtr> upper_boundary_outputs; | |||||
| ret = RunCpuKernelForValueRange(node, inputs, upper_boundary_outputs); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("Node %s run cpu kernel failed while calculating value range.", node->GetName().c_str()); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| // construct value range from output tensor | |||||
| OpDescPtr node_desc = node->GetOpDesc(); | |||||
| std::vector<std::pair<int64_t, int64_t>> output_tensor_value_range; | |||||
| size_t node_output_desc_size = node_desc->GetOutputsSize(); | |||||
| for (size_t i = 0; i < node_output_desc_size; ++i) { | |||||
| output_tensor_value_range.clear(); | |||||
| auto output_tensor_desc = node_desc->MutableOutputDesc(i); | |||||
| auto output_shape_size = output_tensor_desc->GetShape().GetShapeSize(); | |||||
| auto lower_boundary_tensor = lower_boundary_outputs[i]; | |||||
| auto lower_boundary_shape = lower_boundary_tensor->GetTensorDesc().GetShape(); | |||||
| auto upper_boundary_tensor = upper_boundary_outputs[i]; | |||||
| auto upper_boundary_shape = upper_boundary_tensor->GetTensorDesc().GetShape(); | |||||
| if (lower_boundary_shape.GetShapeSize() != output_shape_size || | |||||
| upper_boundary_shape.GetShapeSize() != output_shape_size) { | |||||
| GELOGD( | |||||
| "Cpu kernel result shapes %s, %s and output shape %s do not match, can not infer value range for output %s.", | |||||
| formats::ShapeToString(lower_boundary_shape).c_str(), formats::ShapeToString(upper_boundary_shape).c_str(), | |||||
| formats::ShapeToString(output_tensor_desc->GetShape()).c_str(), output_tensor_desc->GetName().c_str()); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| auto data_type = output_tensor_desc->GetDataType(); | |||||
| switch (data_type) { | |||||
| GET_DATA_BY_DTYPE(DT_INT8, int8_t) | |||||
| GET_DATA_BY_DTYPE(DT_INT16, int16_t) | |||||
| GET_DATA_BY_DTYPE(DT_INT32, int32_t) | |||||
| GET_DATA_BY_DTYPE(DT_INT64, int64_t) | |||||
| GET_DATA_BY_DTYPE(DT_UINT8, uint8_t) | |||||
| GET_DATA_BY_DTYPE(DT_UINT16, uint16_t) | |||||
| GET_DATA_BY_DTYPE(DT_UINT32, uint32_t) | |||||
| GET_DATA_BY_DTYPE(DT_UINT64, uint64_t) | |||||
| GET_DATA_BY_DTYPE(DT_FLOAT, float) | |||||
| GET_DATA_BY_DTYPE(DT_DOUBLE, double) | |||||
| default: | |||||
| GELOGW("Data type:%s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| output_tensor_desc->SetValueRange(output_tensor_value_range); | |||||
| GELOGD("Node %s calculates output %zu value range %s by running cpu kernel.", node->GetName().c_str(), i, | |||||
| formats::RangeToString(output_tensor_value_range).c_str()); | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| template <typename T> | |||||
| void InferValueRangePass::ConstructValueRange(const GeTensorPtr &left_tensor, const GeTensorPtr &right_tensor, | |||||
| std::vector<std::pair<int64_t, int64_t>> &value_range) { | |||||
| auto x = reinterpret_cast<const T *>(left_tensor->GetData().GetData()); | |||||
| auto y = reinterpret_cast<const T *>(right_tensor->GetData().GetData()); | |||||
| if (x == nullptr || y == nullptr) { | |||||
| GELOGI("Output tensor of cpu kernel does not have data, no way to set value range."); | |||||
| return; | |||||
| } | |||||
| auto left_tensor_shape = left_tensor->GetTensorDesc().GetShape(); | |||||
| for (auto j = 0; j < left_tensor_shape.GetShapeSize(); ++j) { | |||||
| auto left = static_cast<int64_t>(*(x + j)); | |||||
| auto right = static_cast<int64_t>(*(y + j)); | |||||
| value_range.emplace_back(left, right); | |||||
| } | |||||
| if (left_tensor_shape.IsScalar()) { | |||||
| GELOGD("When inferring value range, output tensors of cpu kernel are scalar tensors."); | |||||
| value_range.emplace_back(static_cast<int64_t>(*x), static_cast<int64_t>(*y)); | |||||
| } | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,49 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_PASSES_INFER_VALUE_RANGE_PASS_H_ | |||||
| #define GE_GRAPH_PASSES_INFER_VALUE_RANGE_PASS_H_ | |||||
| #include "graph/passes/infer_base_pass.h" | |||||
| namespace ge { | |||||
| class InferValueRangePass : public InferBasePass { | |||||
| public: | |||||
| graphStatus Infer(NodePtr &node) override; | |||||
| private: | |||||
| std::string SerialTensorInfo(const GeTensorDescPtr &tensor_desc) const override; | |||||
| graphStatus UpdateTensorDesc(const GeTensorDescPtr &src, GeTensorDescPtr &dst, bool &changed) override; | |||||
| graphStatus UpdateOutputFromSubgraphs(const std::vector<GeTensorDescPtr> &src, GeTensorDescPtr &dst) override; | |||||
| graphStatus UpdateOutputFromSubgraphsForMultiDims(const std::vector<GeTensorDescPtr> &src, | |||||
| GeTensorDescPtr &dst) override; | |||||
| bool NeedInfer(const NodePtr &node) const override; | |||||
| bool InputIsDynamic(const NodePtr &node) const; | |||||
| bool InputIsConstOrHasValueRange(const NodePtr &node) const; | |||||
| bool InputHasUnknownValueRange(const NodePtr &node) const; | |||||
| graphStatus GenerateWorstValueRange(NodePtr &node); | |||||
| template <typename T> | |||||
| graphStatus ConstructData(const GeTensorDesc &tensor_desc, bool use_floor_value, GeTensorPtr &output_ptr); | |||||
| graphStatus ConstructDataByType(const GeTensorDesc &tensor_desc, bool use_floor_value, GeTensorPtr &output_ptr); | |||||
| vector<ConstGeTensorPtr> ConstructInputTensors(const NodePtr &node, bool use_floor_value); | |||||
| template <typename T> | |||||
| void ConstructValueRange(const GeTensorPtr &left_tensor, const GeTensorPtr &right_tensor, | |||||
| std::vector<std::pair<int64_t, int64_t>> &value_range); | |||||
| graphStatus ConstructInputAndInferValueRange(NodePtr &node); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_PASSES_INFER_VALUE_RANGE_PASS_H_ | |||||
| @@ -16,8 +16,6 @@ | |||||
| #include "graph/passes/mark_force_unknown_for_cond_pass.h" | #include "graph/passes/mark_force_unknown_for_cond_pass.h" | ||||
| #include <queue> | |||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
| @@ -26,17 +24,7 @@ namespace { | |||||
| inline bool IsMergeInLoop(const NodePtr &node) { | inline bool IsMergeInLoop(const NodePtr &node) { | ||||
| const static std::set<std::string> kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; | const static std::set<std::string> kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; | ||||
| std::string node_type; | |||||
| (void)GetOriginalType(node, node_type); | |||||
| return kLoopMergeInputs.count(node_type) > 0; | |||||
| } | |||||
| inline bool IsSwitchInLoop(const NodePtr &node) { | |||||
| const static std::set<std::string> kLoopSwitchInputs{ MERGE, REFMERGE, LOOPCOND }; | |||||
| std::string node_type; | |||||
| (void)GetOriginalType(node, node_type); | |||||
| return kLoopSwitchInputs.count(node_type) > 0; | |||||
| return kLoopMergeInputs.count(NodeUtils::GetNodeType(node)) > 0; | |||||
| } | } | ||||
| } | } | ||||
| @@ -44,10 +32,7 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||||
| GELOGD("MarkForceUnknownForCondPass Enter"); | GELOGD("MarkForceUnknownForCondPass Enter"); | ||||
| std::map<NodePtr, std::vector<NodePtr>> switch_groups; | std::map<NodePtr, std::vector<NodePtr>> switch_groups; | ||||
| for (const auto &node : graph->GetDirectNode()) { | for (const auto &node : graph->GetDirectNode()) { | ||||
| std::string node_type; | |||||
| GE_CHK_STATUS_RET(GetOriginalType(node, node_type), | |||||
| "[Get][OriginalType] of node in graph:%s failed.", graph->GetName().c_str()); | |||||
| if (kMergeOpTypes.count(node_type) == 0) { | |||||
| if (kMergeOpTypes.count(NodeUtils::GetNodeType(node)) == 0) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -64,6 +49,51 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @brief Deal with Switch node for LoopCond | |||||
| /// @param [in] Switch node | |||||
| /// @param [in] dest span | |||||
| /// @param [out] Search queue | |||||
| /// @return true: Switch In while loop / false: Not in while Loop. | |||||
| /// | |||||
| bool MarkForceUnknownForCondPass::DealAsLoopSwitch(const NodePtr &node, uint32_t dst_span, | |||||
| std::queue<std::pair<NodePtr, uint32_t>> &search_queue) { | |||||
| /// LoopCond --->\. | |||||
| /// \. | |||||
| /// Enter-----------+ \. | |||||
| /// +--> Merge --> Switch --> Exit | |||||
| /// NextIteration---+ | |||||
| const auto is_loop_op = [](const NodePtr &n) { | |||||
| return NodeUtils::GetNodeType(n) == LOOPCOND; | |||||
| }; | |||||
| const auto is_exit_op = [](const NodePtr &n) { | |||||
| return kExitOpTypes.count(NodeUtils::GetNodeType(n)) > 0; | |||||
| }; | |||||
| const auto src_nodes = node->GetInAllNodes(); | |||||
| const auto dst_nodes = node->GetOutAllNodes(); | |||||
| if (std::none_of(src_nodes.begin(), src_nodes.end(), is_loop_op) && | |||||
| std::none_of(dst_nodes.begin(), dst_nodes.end(), is_exit_op)) { | |||||
| return false; | |||||
| } | |||||
| for (const auto &m : src_nodes) { | |||||
| if (kMergeOpTypes.count(NodeUtils::GetNodeType(m)) > 0) { | |||||
| for (const auto &n : m->GetInAllNodes()) { | |||||
| if (kNextIterationOpTypes.count(NodeUtils::GetNodeType(n)) > 0) { | |||||
| continue; | |||||
| } | |||||
| search_queue.push({n, dst_span}); | |||||
| GELOGD("Travel in Loop: %s <-- %s <-- %s, span is: %u", node->GetName().c_str(), m->GetName().c_str(), | |||||
| n->GetName().c_str(), dst_span); | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| /// | /// | ||||
| /// @brief Mark force unknown shape for Switch node | /// @brief Mark force unknown shape for Switch node | ||||
| /// @param [in] merge node | /// @param [in] merge node | ||||
| @@ -72,6 +102,7 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||||
| /// | /// | ||||
| void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std::vector<NodePtr> &switch_group) { | void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std::vector<NodePtr> &switch_group) { | ||||
| // Switch --> {Switch --> Merge} --> Merge | // Switch --> {Switch --> Merge} --> Merge | ||||
| GELOGD("Search Switch node for Merge: %s", node->GetName().c_str()); | |||||
| std::unordered_set<NodePtr> nodes_seen; | std::unordered_set<NodePtr> nodes_seen; | ||||
| std::queue<std::pair<NodePtr, uint32_t>> search_queue({{node, 0}}); | std::queue<std::pair<NodePtr, uint32_t>> search_queue({{node, 0}}); | ||||
| while (!search_queue.empty()) { | while (!search_queue.empty()) { | ||||
| @@ -79,43 +110,25 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: | |||||
| const auto dst_span = search_queue.front().second; | const auto dst_span = search_queue.front().second; | ||||
| search_queue.pop(); | search_queue.pop(); | ||||
| // Switch --> Identity --> Constant | |||||
| for (const auto &in_node : dst_node->GetInControlNodes()) { | |||||
| if (nodes_seen.count(in_node) > 0) { | |||||
| GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| nodes_seen.insert(in_node); | |||||
| if (in_node->GetType() == IDENTITY) { | |||||
| GELOGD("Travel node: %s, In control: %s, span is: %u", dst_node->GetName().c_str(), | |||||
| in_node->GetName().c_str(), dst_span); | |||||
| search_queue.push({in_node, dst_span}); | |||||
| } | |||||
| } | |||||
| for (const auto &in_node : dst_node->GetInDataNodes()) { | |||||
| for (const auto &in_node : dst_node->GetInAllNodes()) { | |||||
| if (nodes_seen.count(in_node) > 0) { | if (nodes_seen.count(in_node) > 0) { | ||||
| GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | ||||
| continue; | continue; | ||||
| } | } | ||||
| nodes_seen.insert(in_node); | nodes_seen.insert(in_node); | ||||
| std::string node_type; | |||||
| (void)GetOriginalType(in_node, node_type); | |||||
| const std::string node_type = NodeUtils::GetNodeType(in_node); | |||||
| GELOGD("Travel node: %s, %s node: %s, span is: %u", dst_node->GetName().c_str(), node_type.c_str(), | GELOGD("Travel node: %s, %s node: %s, span is: %u", dst_node->GetName().c_str(), node_type.c_str(), | ||||
| in_node->GetName().c_str(), dst_span); | in_node->GetName().c_str(), dst_span); | ||||
| if (kSwitchOpTypes.count(node_type) > 0) { // Switch input node. | if (kSwitchOpTypes.count(node_type) > 0) { // Switch input node. | ||||
| if (DealAsLoopSwitch(in_node, dst_span, search_queue)) { | |||||
| continue; | |||||
| } | |||||
| if (dst_span > 0) { | if (dst_span > 0) { | ||||
| search_queue.push({in_node, dst_span - 1}); | search_queue.push({in_node, dst_span - 1}); | ||||
| } else { | } else { | ||||
| const auto &all_in_nodes = in_node->GetInDataNodes(); | |||||
| if (std::any_of(all_in_nodes.begin(), all_in_nodes.end(), IsSwitchInLoop)) { | |||||
| GELOGW("Travel node: %s, %s node: %s, Skip LoopCond switch", dst_node->GetName().c_str(), node_type.c_str(), | |||||
| in_node->GetName().c_str()); | |||||
| } else { | |||||
| switch_group.emplace_back(in_node); | |||||
| } | |||||
| switch_group.emplace_back(in_node); | |||||
| } | } | ||||
| } else if (kMergeOpTypes.count(node_type) > 0) { // Merge input node. | } else if (kMergeOpTypes.count(node_type) > 0) { // Merge input node. | ||||
| search_queue.push({in_node, dst_span + 1}); | search_queue.push({in_node, dst_span + 1}); | ||||
| @@ -19,12 +19,23 @@ | |||||
| #include "inc/graph_pass.h" | #include "inc/graph_pass.h" | ||||
| #include <queue> | |||||
| namespace ge { | namespace ge { | ||||
| class MarkForceUnknownForCondPass : public GraphPass { | class MarkForceUnknownForCondPass : public GraphPass { | ||||
| public: | public: | ||||
| Status Run(ComputeGraphPtr graph); | Status Run(ComputeGraphPtr graph); | ||||
| private: | private: | ||||
| /// | |||||
| /// @brief Deal with Switch node for LoopCond | |||||
| /// @param [in] Switch node | |||||
| /// @param [in] dest span | |||||
| /// @param [out] Search queue | |||||
| /// @return true: Switch In while loop / false: Not in while Loop. | |||||
| /// | |||||
| bool DealAsLoopSwitch(const NodePtr &node, uint32_t dst_span, std::queue<std::pair<NodePtr, uint32_t>> &search_queue); | |||||
| /// | /// | ||||
| /// @brief Mark force unknown shape for Switch node | /// @brief Mark force unknown shape for Switch node | ||||
| /// @param [in] merge node | /// @param [in] merge node | ||||
| @@ -24,7 +24,9 @@ using std::string; | |||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| const int64_t kLoopType = 1; | |||||
| constexpr int64_t kLoopType = 1; | |||||
| constexpr uint8_t kMaxTransOp = 3; | |||||
| constexpr uint8_t kTransOpIoSize = 1; | |||||
| } | } | ||||
| Status NextIterationPass::Run(ComputeGraphPtr graph) { | Status NextIterationPass::Run(ComputeGraphPtr graph) { | ||||
| @@ -287,18 +289,25 @@ void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group, i | |||||
| std::string node_type; | std::string node_type; | ||||
| for (const auto &switch_node : loop_group.switch_nodes) { | for (const auto &switch_node : loop_group.switch_nodes) { | ||||
| SetControlFlowGroup(switch_node, group_index); | SetControlFlowGroup(switch_node, group_index); | ||||
| for (const auto &node : switch_node->GetOutDataNodes()) { | |||||
| (void)GetOriginalType(node, node_type); | |||||
| if (kExitOpTypes.count(node_type) > 0) { | |||||
| SetControlFlowGroup(node, group_index); | |||||
| } else { | |||||
| // For: Switch -> Cast -> Exit | |||||
| for (const auto &n : node->GetOutDataNodes()) { | |||||
| (void)GetOriginalType(n, node_type); | |||||
| if (kExitOpTypes.count(node_type) > 0) { | |||||
| SetControlFlowGroup(n, group_index); | |||||
| } | |||||
| for (auto node : switch_node->GetOutDataNodes()) { | |||||
| // Switch --> Exit | |||||
| // Switch --> Cast --> Exit | |||||
| // Switch --> TransData --> Cast --> Exit | |||||
| for (uint8_t i = 0; i < kMaxTransOp; ++i) { | |||||
| if (node->GetInDataNodes().size() != kTransOpIoSize || node->GetAllOutDataAnchorsSize() != kTransOpIoSize) { | |||||
| break; | |||||
| } | } | ||||
| if (kExitOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { | |||||
| SetControlFlowGroup(node, group_index); | |||||
| break; | |||||
| } | |||||
| const auto &all_nodes = node->GetOutAllNodes(); | |||||
| if (all_nodes.size() != kTransOpIoSize) { | |||||
| break; | |||||
| } | |||||
| node = all_nodes.at(0); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -71,7 +71,7 @@ Status ReplaceWithEmptyConstPass::Run(NodePtr &node) { | |||||
| GELOGI("Node %s Got empty output_desc_ptr, ignore current pass.", node->GetName().c_str()); | GELOGI("Node %s Got empty output_desc_ptr, ignore current pass.", node->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (!IsEmptyTenor(output_desc_ptr->GetShape())) { | |||||
| if (!IsKnownEmptyTenor(output_desc_ptr->GetShape())) { | |||||
| is_all_output_empty = false; | is_all_output_empty = false; | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -107,12 +107,16 @@ Status ReplaceWithEmptyConstPass::GetOutputsOfCurrNode(const NodePtr &node_to_re | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool ReplaceWithEmptyConstPass::IsEmptyTenor(const GeShape &shape) const { | |||||
| bool ReplaceWithEmptyConstPass::IsKnownEmptyTenor(const GeShape &shape) const { | |||||
| bool is_known_empty_tensor = false; | |||||
| for (auto dim : shape.GetDims()) { | for (auto dim : shape.GetDims()) { | ||||
| if (dim == 0) { | |||||
| return true; | |||||
| if (dim < 0) { | |||||
| // current dim is unknown dim, skip replace | |||||
| return false; | |||||
| } else if (dim == 0) { | |||||
| is_known_empty_tensor = true; | |||||
| } | } | ||||
| } | } | ||||
| return false; | |||||
| return is_known_empty_tensor; | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -26,7 +26,7 @@ class ReplaceWithEmptyConstPass : public FoldingPass { | |||||
| private: | private: | ||||
| Status GetOutputsOfCurrNode(const NodePtr &node_to_replace, vector<GeTensorPtr> &outputs); | Status GetOutputsOfCurrNode(const NodePtr &node_to_replace, vector<GeTensorPtr> &outputs); | ||||
| bool IsEmptyTenor(const GeShape &shape) const; | |||||
| bool IsKnownEmptyTenor(const GeShape &shape) const; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_PASSES_REPLACE_WITH_EMPTY_CONST_PASS_H_ | #endif // GE_GRAPH_PASSES_REPLACE_WITH_EMPTY_CONST_PASS_H_ | ||||
| @@ -395,8 +395,9 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr & | |||||
| peer_cond_anchor->GetOwnerNode()->GetName().c_str(), stream_switch->GetName().c_str()); | peer_cond_anchor->GetOwnerNode()->GetName().c_str(), stream_switch->GetName().c_str()); | ||||
| int64_t group_index = -1; | int64_t group_index = -1; | ||||
| (void)AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); | |||||
| SetControlFlowGroup(stream_switch, group_index); | |||||
| if (AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { | |||||
| SetControlFlowGroup(stream_switch, group_index); | |||||
| } | |||||
| return stream_switch; | return stream_switch; | ||||
| } | } | ||||
| @@ -54,6 +54,7 @@ | |||||
| #include "graph/passes/hccl_group_pass.h" | #include "graph/passes/hccl_group_pass.h" | ||||
| #include "graph/passes/identity_pass.h" | #include "graph/passes/identity_pass.h" | ||||
| #include "graph/passes/infershape_pass.h" | #include "graph/passes/infershape_pass.h" | ||||
| #include "graph/passes/infer_value_range_pass.h" | |||||
| #include "graph/passes/merge_pass.h" | #include "graph/passes/merge_pass.h" | ||||
| #include "graph/passes/net_output_pass.h" | #include "graph/passes/net_output_pass.h" | ||||
| #include "graph/passes/no_use_reshape_remove_pass.h" | #include "graph/passes/no_use_reshape_remove_pass.h" | ||||
| @@ -2016,6 +2017,8 @@ Status GraphPrepare::InferShapeForPreprocess() { | |||||
| names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | ||||
| ConstantFoldingPass constant_folding_pass; | ConstantFoldingPass constant_folding_pass; | ||||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | ||||
| InferValueRangePass infer_value_pass; | |||||
| names_to_passes.emplace_back("InferValuePass", &infer_value_pass); | |||||
| int32_t dev_count = 0; | int32_t dev_count = 0; | ||||
| AicpuConstantFoldingPass aicpu_constant_folding_pass; | AicpuConstantFoldingPass aicpu_constant_folding_pass; | ||||
| @@ -568,6 +568,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std: | |||||
| } | } | ||||
| std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | ||||
| GE_CHECK_NOTNULL(aipp_params); | |||||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | ||||
| "[Get][Attr] %s from op:%s failed", ATTR_NAME_AIPP.c_str(), data_op->GetName().c_str()); | "[Get][Attr] %s from op:%s failed", ATTR_NAME_AIPP.c_str(), data_op->GetName().c_str()); | ||||
| @@ -1206,7 +1206,7 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector<NodePtr> &start_ | |||||
| auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); | auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); | ||||
| if (!IsAllDimsPositive(dims)) { | if (!IsAllDimsPositive(dims)) { | ||||
| REPORT_CALL_ERROR("E19999", "Failed to copy multi batch graph, the node %s still has unknown shape %s", | REPORT_CALL_ERROR("E19999", "Failed to copy multi batch graph, the node %s still has unknown shape %s", | ||||
| node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||||
| node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[Check][Param] Failed to copy multi batch graph, the node %s still has unknown shape %s", | GELOGE(INTERNAL_ERROR, "[Check][Param] Failed to copy multi batch graph, the node %s still has unknown shape %s", | ||||
| node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -295,13 +295,15 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||||
| } | } | ||||
| } | } | ||||
| tensor_desc->SetShape(shape); | tensor_desc->SetShape(shape); | ||||
| args.input_desc[input_index] = tensor_desc; | |||||
| GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); | |||||
| GELOGD("Update shape[%s] of input[%zu] to [%s]", | |||||
| shape.ToString().c_str(), input_index, tensor_desc->MutableShape().ToString().c_str()); | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), | GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), | ||||
| "[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size," | "[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size," | ||||
| "index = %zu, shape = [%s], model_id = %u.", | "index = %zu, shape = [%s], model_id = %u.", | ||||
| input_index, tensor_desc->GetShape().ToString().c_str(), model_id_); | input_index, tensor_desc->GetShape().ToString().c_str(), model_id_); | ||||
| GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size); | |||||
| GELOGD("Input tensor[%zu] size = %ld", input_index, tensor_size); | |||||
| TensorUtils::SetSize(*tensor_desc, tensor_size); | |||||
| args.input_desc[input_index] = tensor_desc; | |||||
| } | } | ||||
| GE_CHECK_GE(tensor_size, 0); | GE_CHECK_GE(tensor_size, 0); | ||||
| @@ -33,9 +33,6 @@ HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, | |||||
| } | } | ||||
| HybridModelExecutor::~HybridModelExecutor() { | HybridModelExecutor::~HybridModelExecutor() { | ||||
| if (context_.rt_gen_context != nullptr) { | |||||
| (void) rtCtxDestroy(context_.rt_gen_context); | |||||
| } | |||||
| } | } | ||||
| Status HybridModelExecutor::Init() { | Status HybridModelExecutor::Init() { | ||||
| @@ -139,7 +136,6 @@ Status HybridModelExecutor::Cleanup() { | |||||
| Status HybridModelExecutor::InitExecutionContext() { | Status HybridModelExecutor::InitExecutionContext() { | ||||
| GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | ||||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | ||||
| context_.global_step = model_->GetGlobalStep(); | context_.global_step = model_->GetGlobalStep(); | ||||
| @@ -191,7 +191,6 @@ HybridModelPipelineExecutor::HybridModelPipelineExecutor(HybridModel *model, uin | |||||
| } | } | ||||
| Status StageExecutor::InitExecutionContext() { | Status StageExecutor::InitExecutionContext() { | ||||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | ||||
| context_.model = model_; | context_.model = model_; | ||||
| @@ -326,17 +326,45 @@ std::shared_ptr<TaskContext> NodeState::GetTaskContext() { | |||||
| } | } | ||||
| void NodeState::SavePersistTensor(int input_idx, const TensorValue &tensor) { | void NodeState::SavePersistTensor(int input_idx, const TensorValue &tensor) { | ||||
| if (node_item_->root_data_.count(input_idx) > 0) { | |||||
| GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); | |||||
| root_tensor_values_[input_idx] = tensor; | |||||
| const auto is_persist_tensor = [](const std::map<const NodeItem *, std::set<int>> &items, int idx) { | |||||
| const auto is_exist = [&idx](const std::pair<const NodeItem *, std::set<int>> &items) { | |||||
| return items.second.count(idx) > 0; | |||||
| }; | |||||
| return std::any_of(items.begin(), items.end(), is_exist); | |||||
| }; | |||||
| if (root_tensor_values_.count(input_idx) > 0) { | |||||
| return; | |||||
| } | } | ||||
| if (node_item_->enter_data_.count(input_idx) > 0) { | |||||
| if (is_persist_tensor(node_item_->root_data_, input_idx)) { | |||||
| GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); | |||||
| root_tensor_values_[input_idx] = tensor; | |||||
| } else if (is_persist_tensor(node_item_->enter_data_, input_idx)) { | |||||
| GELOGD("[%s] Save Enter input tensor: %d", GetName().c_str(), input_idx); | GELOGD("[%s] Save Enter input tensor: %d", GetName().c_str(), input_idx); | ||||
| root_tensor_values_[input_idx] = tensor; | root_tensor_values_[input_idx] = tensor; | ||||
| } | } | ||||
| } | } | ||||
| void NodeState::UpdatePersistTensor() { | |||||
| const auto update_tensor = [&](const std::map<const NodeItem *, std::set<int>> &items) { | |||||
| for (const auto &item : items) { | |||||
| for (const auto idx : item.second) { | |||||
| UpdatePersistTensor(idx); | |||||
| } | |||||
| } | |||||
| }; | |||||
| if (root_tensor_values_.empty()) { | |||||
| return; | |||||
| } | |||||
| update_tensor(node_item_->root_data_); | |||||
| if (iteration_count_ > 0) { | |||||
| update_tensor(node_item_->enter_data_); | |||||
| } | |||||
| } | |||||
| void NodeState::UpdatePersistTensor(int input_idx) { | void NodeState::UpdatePersistTensor(int input_idx) { | ||||
| const auto it = root_tensor_values_.find(input_idx); | const auto it = root_tensor_values_.find(input_idx); | ||||
| if (it == root_tensor_values_.end()) { | if (it == root_tensor_values_.end()) { | ||||
| @@ -363,16 +391,9 @@ void NodeState::ResetContext(uint64_t iteration) { | |||||
| data_scheduled_ = static_cast<uint32_t>(node_item_->root_data_.size()); | data_scheduled_ = static_cast<uint32_t>(node_item_->root_data_.size()); | ||||
| ctrl_scheduled_ = static_cast<uint32_t>(node_item_->root_ctrl_.size()); | ctrl_scheduled_ = static_cast<uint32_t>(node_item_->root_ctrl_.size()); | ||||
| for (auto item : node_item_->root_data_) { | |||||
| UpdatePersistTensor(item.first); | |||||
| } | |||||
| if (iteration > 0) { | if (iteration > 0) { | ||||
| data_scheduled_ += static_cast<uint32_t>(node_item_->enter_data_.size()); | data_scheduled_ += static_cast<uint32_t>(node_item_->enter_data_.size()); | ||||
| ctrl_scheduled_ += static_cast<uint32_t>(node_item_->enter_ctrl_.size()); | ctrl_scheduled_ += static_cast<uint32_t>(node_item_->enter_ctrl_.size()); | ||||
| for (auto item : node_item_->enter_data_) { | |||||
| UpdatePersistTensor(item.first); | |||||
| } | |||||
| } | } | ||||
| iteration_count_ = iteration; | iteration_count_ = iteration; | ||||
| @@ -132,6 +132,7 @@ struct NodeState { | |||||
| void RunNextIteration(); | void RunNextIteration(); | ||||
| void SavePersistTensor(int input_idx, const TensorValue &tensor); | void SavePersistTensor(int input_idx, const TensorValue &tensor); | ||||
| void UpdatePersistTensor(); | |||||
| Status NodeScheduled(const std::function<void(const NodeItem *)> &ready) const; | Status NodeScheduled(const std::function<void(const NodeItem *)> &ready) const; | ||||
| @@ -109,7 +109,6 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue | |||||
| GE_CHECK_NOTNULL(output_desc); | GE_CHECK_NOTNULL(output_desc); | ||||
| output_desc->SetShape(tensor_desc->GetShape()); | output_desc->SetShape(tensor_desc->GetShape()); | ||||
| output_desc->SetOriginShape(tensor_desc->GetOriginShape()); | output_desc->SetOriginShape(tensor_desc->GetOriginShape()); | ||||
| output_desc->SetDataType(tensor_desc->GetDataType()); | |||||
| node_state->SetSkipInferShape(true); | node_state->SetSkipInferShape(true); | ||||
| } | } | ||||
| } | } | ||||
| @@ -373,6 +373,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, | |||||
| auto executor = node_item.node_executor; | auto executor = node_item.node_executor; | ||||
| GE_CHECK_NOTNULL(executor); | GE_CHECK_NOTNULL(executor); | ||||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start"); | RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start"); | ||||
| node_state.UpdatePersistTensor(); | |||||
| GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[Prepare][Task] for [%s] failed.", | GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[Prepare][Task] for [%s] failed.", | ||||
| node_state.GetName().c_str()); | node_state.GetName().c_str()); | ||||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End"); | RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End"); | ||||
| @@ -21,10 +21,17 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext *context) { | Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext *context) { | ||||
| const auto &node_item = *node_state.GetNodeItem(); | |||||
| GE_CHECK_NOTNULL(context); | GE_CHECK_NOTNULL(context); | ||||
| rtContext_t rt_gen_context = nullptr; | |||||
| GE_CHK_RT_RET(rtCtxCreate(&rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||||
| std::function<void()> callback = [&]() { | |||||
| (void) rtCtxDestroy(rt_gen_context); | |||||
| GE_CHK_RT(rtCtxSetCurrent(context->rt_context)); | |||||
| }; | |||||
| GE_MAKE_GUARD(rt_gen_context, callback); | |||||
| const auto &node_item = *node_state.GetNodeItem(); | |||||
| RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); | RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); | ||||
| GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context)); | |||||
| if (context->ge_context != nullptr) { | if (context->ge_context != nullptr) { | ||||
| GetThreadLocalContext() = *context->ge_context; | GetThreadLocalContext() = *context->ge_context; | ||||
| @@ -196,9 +196,7 @@ Status HybridModelBuilder::CopyGraph() { | |||||
| GELOGD("Copy compute graph begin."); | GELOGD("Copy compute graph begin."); | ||||
| auto root_graph = ge_root_model_->GetRootGraph(); | auto root_graph = ge_root_model_->GetRootGraph(); | ||||
| ge_root_model_->IncreaseBuildTimes(); | |||||
| std::string new_graph_name = ge_root_model_->GetRootGraph()->GetName() + "_" + | |||||
| std::to_string(ge_root_model_->GetBuildTimes()); | |||||
| std::string new_graph_name = ge_root_model_->GetRootGraph()->GetName(); | |||||
| ComputeGraphPtr new_root_graph = MakeShared<ComputeGraph>(new_graph_name); | ComputeGraphPtr new_root_graph = MakeShared<ComputeGraph>(new_graph_name); | ||||
| GE_CHECK_NOTNULL(new_root_graph); | GE_CHECK_NOTNULL(new_root_graph); | ||||
| int32_t depth = 0; | int32_t depth = 0; | ||||
| @@ -1046,6 +1044,7 @@ Status HybridModelBuilder::InitConstantOps() { | |||||
| } else { | } else { | ||||
| var_tensor.reset(new(std::nothrow)TensorValue(nullptr, 0)); | var_tensor.reset(new(std::nothrow)TensorValue(nullptr, 0)); | ||||
| } | } | ||||
| GE_CHECK_NOTNULL(var_tensor); | |||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | ||||
| GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | ||||
| @@ -24,6 +24,8 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| namespace { | namespace { | ||||
| const uint8_t kMaxTransCount = 3; | |||||
| const uint32_t kTransOpIoSize = 1; | |||||
| const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; | const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; | ||||
| const char *const kNodeTypeRetVal = "_RetVal"; | const char *const kNodeTypeRetVal = "_RetVal"; | ||||
| const std::set<std::string> kControlOpTypes{ | const std::set<std::string> kControlOpTypes{ | ||||
| @@ -39,6 +41,25 @@ const std::set<std::string> kMergeOpTypes{ | |||||
| MERGE, REFMERGE, STREAMMERGE | MERGE, REFMERGE, STREAMMERGE | ||||
| }; | }; | ||||
| bool IsEnterFeedNode(NodePtr node) { | |||||
| // For: Enter -> node | |||||
| // For: Enter -> Cast -> node | |||||
| // For: Enter -> TransData -> Cast -> node | |||||
| for (uint8_t i = 0; i < kMaxTransCount; ++i) { | |||||
| if (kEnterOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { | |||||
| GELOGD("Node[%u] is Enter feed node.", node->GetName().c_str()); | |||||
| return true; | |||||
| } | |||||
| const auto all_nodes = node->GetInDataNodes(); | |||||
| if (all_nodes.size() != kTransOpIoSize || node->GetAllInDataAnchorsSize() != kTransOpIoSize) { | |||||
| return false; | |||||
| } | |||||
| node = all_nodes.at(0); | |||||
| } | |||||
| return false; | |||||
| } | |||||
| Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { | Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { | ||||
| uint32_t parent_index = 0; | uint32_t parent_index = 0; | ||||
| if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | ||||
| @@ -395,11 +416,13 @@ void NodeItem::SetDataSend(NodeItem *node_item, int anchor_index) { | |||||
| data_send_.emplace(node_item); | data_send_.emplace(node_item); | ||||
| node_item->data_recv_[this] = anchor_index; | node_item->data_recv_[this] = anchor_index; | ||||
| if (is_root_node_) { | if (is_root_node_) { | ||||
| node_item->root_data_[anchor_index] = this; | |||||
| auto &data_anchors = node_item->root_data_[this]; | |||||
| data_anchors.emplace(anchor_index); | |||||
| } | } | ||||
| // If Enter feed Not Merge, take as root Node. | // If Enter feed Not Merge, take as root Node. | ||||
| if (IsEnterOp() && (node_item->node_type != STREAMMERGE)) { | |||||
| node_item->enter_data_[anchor_index] = this; | |||||
| if (IsEnterFeedNode(node) && (node_item->node_type != STREAMMERGE)) { | |||||
| auto &data_anchors = node_item->enter_data_[this]; | |||||
| data_anchors.emplace(anchor_index); | |||||
| } | } | ||||
| GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | ||||
| } | } | ||||
| @@ -417,7 +440,7 @@ void NodeItem::SetCtrlSend(NodeItem *node_item, uint32_t switch_index) { | |||||
| node_item->root_ctrl_.emplace(this); | node_item->root_ctrl_.emplace(this); | ||||
| } | } | ||||
| // If Enter feed control signal, take as root Node. | // If Enter feed control signal, take as root Node. | ||||
| if (IsEnterOp() && (node_item->node_type != STREAMMERGE && node_item->node_type != STREAMACTIVE)) { | |||||
| if (IsEnterFeedNode(node) && (node_item->node_type != STREAMMERGE && node_item->node_type != STREAMACTIVE)) { | |||||
| node_item->enter_ctrl_.emplace(this); | node_item->enter_ctrl_.emplace(this); | ||||
| } | } | ||||
| GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | ||||
| @@ -148,9 +148,9 @@ struct NodeItem { | |||||
| int64_t frame_index_ = -1; | int64_t frame_index_ = -1; | ||||
| int64_t parent_frame_ = -1; | int64_t parent_frame_ = -1; | ||||
| std::set<const NodeItem *> root_ctrl_; // Recv ctrl from root node | std::set<const NodeItem *> root_ctrl_; // Recv ctrl from root node | ||||
| std::map<int, const NodeItem *> root_data_; // Recv data from root node | |||||
| std::map<const NodeItem *, std::set<int>> root_data_; // Recv data from root node | |||||
| std::set<const NodeItem *> enter_ctrl_; // Recv ctrl from Enter node | std::set<const NodeItem *> enter_ctrl_; // Recv ctrl from Enter node | ||||
| std::map<int, const NodeItem *> enter_data_; // Recv data from Enter node | |||||
| std::map<const NodeItem *, std::set<int>> enter_data_; // Recv data from Enter node | |||||
| std::set<const NodeItem *> data_send_; // Send data notify to | std::set<const NodeItem *> data_send_; // Send data notify to | ||||
| std::map<const NodeItem *, int> data_recv_; // Recv data notify from | std::map<const NodeItem *, int> data_recv_; // Recv data notify from | ||||
| std::set<const NodeItem *> ctrl_send_; // Send ctrl notify to | std::set<const NodeItem *> ctrl_send_; // Send ctrl notify to | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "framework/common/taskdown_common.h" | #include "framework/common/taskdown_common.h" | ||||
| #include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
| #include "external/runtime/rt_error_codes.h" | #include "external/runtime/rt_error_codes.h" | ||||
| #include "single_op/task/build_task_utils.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -196,6 +197,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
| GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | ||||
| GE_CHECK_NOTNULL(context.GetExecutionContext()->model); | |||||
| GELOGD("[DEBUG_TASK_INFO : Executor Task] %s/%s %s", | |||||
| context.GetExecutionContext()->model->GetModelName().c_str(), | |||||
| (*it)->GetName().empty() ? (*it)->GetLogName().c_str() : (*it)->GetName().c_str(), | |||||
| BuildTaskUtils::GetTaskInfo(context).c_str()); | |||||
| // save profiling data | // save profiling data | ||||
| uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| @@ -208,7 +214,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| context.SetTaskId(task_id); | context.SetTaskId(task_id); | ||||
| context.SetStreamId(stream_id); | context.SetStreamId(stream_id); | ||||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim(), (*it)->GetOpType()); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| } | } | ||||
| @@ -33,6 +33,7 @@ namespace { | |||||
| constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
| constexpr char const *kAttrOpParamSize = "op_para_size"; | constexpr char const *kAttrOpParamSize = "op_para_size"; | ||||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
| const string kAtomicOpType = "DynamicAtomicAddrClean"; | |||||
| std::atomic<std::uint64_t> log_id(0); | std::atomic<std::uint64_t> log_id(0); | ||||
| } // namespace | } // namespace | ||||
| @@ -51,6 +52,7 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
| } | } | ||||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| op_type_ = op_desc.GetType(); | |||||
| log_name_ = op_desc.GetName() + "_tvmbin"; | log_name_ = op_desc.GetName() + "_tvmbin"; | ||||
| log_id_ = log_id++; | log_id_ = log_id++; | ||||
| auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | ||||
| @@ -538,6 +540,10 @@ const std::string &AiCoreOpTask::GetName() const { | |||||
| return stub_name_; | return stub_name_; | ||||
| } | } | ||||
| const std::string &AiCoreOpTask::GetOpType() const { | |||||
| return op_type_; | |||||
| } | |||||
| std::string AiCoreOpTask::GetKeyForOpParamSize() const { | std::string AiCoreOpTask::GetKeyForOpParamSize() const { | ||||
| return kAttrOpParamSize; | return kAttrOpParamSize; | ||||
| } | } | ||||
| @@ -631,6 +637,10 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co | |||||
| return op_desc.GetName() + "_atomic_kernelname"; | return op_desc.GetName() + "_atomic_kernelname"; | ||||
| } | } | ||||
| const std::string &AtomicAddrCleanOpTask::GetOpType() const { | |||||
| return kAtomicOpType; | |||||
| } | |||||
| Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | ||||
| GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | ||||
| @@ -72,12 +72,16 @@ class AiCoreOpTask { | |||||
| const std::string& GetName() const; | const std::string& GetName() const; | ||||
| const std::string& GetLogName() const {return log_name_;} | |||||
| bool GetClearAtomic() const {return clear_atomic_;} | bool GetClearAtomic() const {return clear_atomic_;} | ||||
| uint32_t GetBlockDim() const {return block_dim_;} | uint32_t GetBlockDim() const {return block_dim_;} | ||||
| void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | ||||
| virtual const std::string& GetOpType() const; | |||||
| protected: | protected: | ||||
| Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
| virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
| @@ -117,12 +121,14 @@ class AiCoreOpTask { | |||||
| uint64_t log_id_ = 0; | uint64_t log_id_ = 0; | ||||
| std::string log_name_; | std::string log_name_; | ||||
| uint32_t offset_ = 0; | uint32_t offset_ = 0; | ||||
| std::string op_type_; | |||||
| }; | }; | ||||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
| public: | public: | ||||
| Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | ||||
| Status UpdateArgs(TaskContext &task_context) override; | Status UpdateArgs(TaskContext &task_context) override; | ||||
| const std::string& GetOpType() const override; | |||||
| protected: | protected: | ||||
| std::string GetKeyForOpParamSize() const override; | std::string GetKeyForOpParamSize() const override; | ||||
| @@ -207,7 +207,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| context.SetTaskId(task_id); | context.SetTaskId(task_id); | ||||
| context.SetStreamId(stream_id); | context.SetStreamId(stream_id); | ||||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0, node_type_); | |||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
| @@ -460,10 +460,6 @@ Status TaskContext::PropagateOutputs() { | |||||
| subgraph_context_->all_inputs_[input_offset].SetName( | subgraph_context_->all_inputs_[input_offset].SetName( | ||||
| node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); | node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); | ||||
| } | } | ||||
| auto dst_node_state = subgraph_context_->GetOrCreateNodeState(dst_node_item); | |||||
| GE_CHECK_NOTNULL(dst_node_state); | |||||
| dst_node_state->SavePersistTensor(dst_input_idx, *tensor); | |||||
| } | } | ||||
| } | } | ||||
| (void)guard; | (void)guard; | ||||
| @@ -495,6 +491,7 @@ void TaskContext::ReleaseInputsAndOutputs() { | |||||
| void TaskContext::ReleaseInput(int index) { | void TaskContext::ReleaseInput(int index) { | ||||
| auto input_tensor = MutableInput(index); | auto input_tensor = MutableInput(index); | ||||
| if (input_tensor != nullptr) { | if (input_tensor != nullptr) { | ||||
| node_state_->SavePersistTensor(index, *input_tensor); | |||||
| input_tensor->Destroy(); | input_tensor->Destroy(); | ||||
| GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index); | GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index); | ||||
| } | } | ||||
| @@ -574,8 +571,8 @@ Status TaskContext::Synchronize() { | |||||
| return execution_context_->Synchronize(GetStream()); | return execution_context_->Synchronize(GetStream()); | ||||
| } | } | ||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
| const std::string &task_type, uint32_t block_dim) { | |||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||||
| uint32_t block_dim, const std::string &op_type) { | |||||
| if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | ||||
| const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
| auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
| @@ -589,7 +586,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
| TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
| tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
| tmp_task_desc_info.op_name = op_desc->GetName(); | tmp_task_desc_info.op_name = op_desc->GetName(); | ||||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||||
| tmp_task_desc_info.op_type = op_type; | |||||
| tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
| tmp_task_desc_info.task_type = task_type; | tmp_task_desc_info.task_type = task_type; | ||||
| tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
| @@ -118,8 +118,8 @@ class TaskContext { | |||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
| const std::string &task_type, uint32_t block_dim); | |||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||||
| uint32_t block_dim, const std::string &op_type); | |||||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
| private: | private: | ||||
| @@ -50,6 +50,8 @@ const std::set<std::string> kBufferOptimizeSupportOption = {"l1_optimize", "l2_o | |||||
| const char *const kBufferOptimizeSupport = "only support l2_optimize, off_optimize"; | const char *const kBufferOptimizeSupport = "only support l2_optimize, off_optimize"; | ||||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT = "high_performance"; | const char *const IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT = "high_performance"; | ||||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_PRECISON = "high_precision"; | const char *const IR_OPTION_OP_SELECT_IMPLMODE_PRECISON = "high_precision"; | ||||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PRECISION_FOR_ALL = "high_precision_for_all"; | |||||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PERFORMANCE_FOR_ALL = "high_performance_for_all"; | |||||
| const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; | const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; | ||||
| const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; | const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; | ||||
| const char *const kSplitError1 = "size not equal to 2 split by \":\""; | const char *const kSplitError1 = "size not equal to 2 split by \":\""; | ||||
| @@ -57,7 +59,8 @@ const char *const kEmptyError = "can not be empty"; | |||||
| const char *const kFloatNumError = "exist float number"; | const char *const kFloatNumError = "exist float number"; | ||||
| const char *const kDigitError = "is not digit"; | const char *const kDigitError = "is not digit"; | ||||
| const char *const kCompressWeightError = "it must be appointed when appoint parameter[--optypelist_for_implmode]"; | const char *const kCompressWeightError = "it must be appointed when appoint parameter[--optypelist_for_implmode]"; | ||||
| const char *const kSelectImplmodeError = "only support high_performance, high_precision"; | |||||
| const char *const kSelectImplmodeError = "only support high_performance, high_precision, " | |||||
| "high_precision_for_all, high_performance_for_all"; | |||||
| const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \""; | const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \""; | ||||
| const char *const kDynamicImageSizeError = "It can only contains digit, \",\", \" \" and \";\""; | const char *const kDynamicImageSizeError = "It can only contains digit, \",\", \" \" and \";\""; | ||||
| const char *const kKeepDtypeError = "file not found"; | const char *const kKeepDtypeError = "file not found"; | ||||
| @@ -782,7 +785,9 @@ Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std:: | |||||
| op_select_implmode = IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT; | op_select_implmode = IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT; | ||||
| } else { | } else { | ||||
| if (op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT && | if (op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT && | ||||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_PRECISON) { | |||||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_PRECISON && | |||||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PRECISION_FOR_ALL && | |||||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PERFORMANCE_FOR_ALL) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | ||||
| {"--op_select_implmode", op_select_implmode.c_str(), | {"--op_select_implmode", op_select_implmode.c_str(), | ||||
| kSelectImplmodeError}); | kSelectImplmodeError}); | ||||
| @@ -60,10 +60,6 @@ class GeRootModel { | |||||
| bool GetTrainFlag() const { return train_flag_; } | bool GetTrainFlag() const { return train_flag_; } | ||||
| int32_t GetBuildTimes() const { return hybrid_build_times_; } | |||||
| void IncreaseBuildTimes() { hybrid_build_times_++; } | |||||
| private: | private: | ||||
| ComputeGraphPtr root_graph_ = nullptr; | ComputeGraphPtr root_graph_ = nullptr; | ||||
| std::map<std::string, GeModelPtr> subgraph_instance_name_to_model_; | std::map<std::string, GeModelPtr> subgraph_instance_name_to_model_; | ||||
| @@ -73,7 +69,6 @@ class GeRootModel { | |||||
| bool train_flag_ = false; | bool train_flag_ = false; | ||||
| std::string model_name_; | std::string model_name_; | ||||
| bool is_specific_stream_ = false; | bool is_specific_stream_ = false; | ||||
| int32_t hybrid_build_times_ = 0; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | ||||
| @@ -143,7 +143,8 @@ DEFINE_string(output_type, "", | |||||
| DEFINE_string(op_select_implmode, "", | DEFINE_string(op_select_implmode, "", | ||||
| "Optional; op select implmode! " | "Optional; op select implmode! " | ||||
| "Support high_precision, high_performance."); | |||||
| "Support high_precision, high_performance, " | |||||
| "high_precision_for_all, high_performance_for_all."); | |||||
| DEFINE_string(optypelist_for_implmode, "", | DEFINE_string(optypelist_for_implmode, "", | ||||
| "Optional; Nodes need use implmode selected in op_select_implmode " | "Optional; Nodes need use implmode selected in op_select_implmode " | ||||
| @@ -311,8 +312,8 @@ class GFlagUtils { | |||||
| "scenarios by using a configuration file.\n" | "scenarios by using a configuration file.\n" | ||||
| " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" | " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" | ||||
| " --op_bank_path Set the path of the custom repository generated after operator tuning with Auto Tune.\n" | " --op_bank_path Set the path of the custom repository generated after operator tuning with Auto Tune.\n" | ||||
| " --op_select_implmode Set op select implmode. Support high_precision, high_performance. " | |||||
| "default: high_performance\n" | |||||
| " --op_select_implmode Set op select implmode. Support high_precision, high_performance, " | |||||
| "high_precision_for_all, high_performance_for_all. default: high_performance\n" | |||||
| " --optypelist_for_implmode Appoint which op to select implmode, cooperated with op_select_implmode.\n" | " --optypelist_for_implmode Appoint which op to select implmode, cooperated with op_select_implmode.\n" | ||||
| " Separate multiple nodes with commas (,). Use double quotation marks (\") " | " Separate multiple nodes with commas (,). Use double quotation marks (\") " | ||||
| "to enclose each argument. E.g.: \"node_name1,node_name2\"\n" | "to enclose each argument. E.g.: \"node_name1,node_name2\"\n" | ||||
| @@ -121,7 +121,7 @@ Status InnerSession::Initialize() { | |||||
| GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | ||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| dump_properties.InitByOptions(); | |||||
| GE_CHK_STATUS_RET(dump_properties.InitByOptions(), "Init dump properties failed."); | |||||
| GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "[Add][DumpProperties] failed."); | GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "[Add][DumpProperties] failed."); | ||||
| ret = graph_manager_.Initialize(options_); | ret = graph_manager_.Initialize(options_); | ||||
| @@ -297,6 +297,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
| for (auto &task : tasks_) { | for (auto &task : tasks_) { | ||||
| ret = task->LaunchKernel(stream_); | ret = task->LaunchKernel(stream_); | ||||
| GELOGD("[DEBUG_TASK_INFO : Static Task] %s %s", | |||||
| task->GetTaskName().c_str(), | |||||
| BuildTaskUtils::GetTaskInfo(task->GetOpdesc(), inputs, outputs).c_str()); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -447,6 +450,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | ||||
| } | } | ||||
| GELOGD("[DEBUG_TASK_INFO : Dynamic Task] %s", | |||||
| BuildTaskUtils::GetTaskInfo(op_task_->GetOpdesc(), input_buffers, output_buffers).c_str()); | |||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -95,35 +95,6 @@ Status CheckInferDepend(GeModelPtr &ge_model, bool &is_infer_depend, bool &is_ho | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) { | |||||
| bool is_infer_depend = false; | |||||
| bool is_host_mem = false; | |||||
| GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); | |||||
| bool need_d2h_cpy = is_infer_depend && !is_host_mem; | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||||
| int32_t kernel_task_num = 0; | |||||
| for (int i = 0; i < tasks.size(); ++i) { | |||||
| auto task_type = static_cast<rtModelTaskType_t>(tasks[i].type()); | |||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? tasks[i].kernel().context() : | |||||
| tasks[i].kernel_with_handle().context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||||
| if (kernel_type == ccKernelType::TE) { | |||||
| if (need_d2h_cpy) { | |||||
| flag = true; | |||||
| return SUCCESS; | |||||
| } | |||||
| kernel_task_num++; | |||||
| if (kernel_task_num > 1) { | |||||
| flag = true; | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | ||||
| @@ -620,29 +591,69 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, | |||||
| return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | ||||
| } | } | ||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | ||||
| if (single_op.op_task_ != nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); | |||||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||||
| } | |||||
| GELOGD("Building AICPU_TF task"); | GELOGD("Building AICPU_TF task"); | ||||
| AiCpuTask *aicpu_task = nullptr; | AiCpuTask *aicpu_task = nullptr; | ||||
| uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | ||||
| GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); | ||||
| if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { | if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { | ||||
| if (i >= tasks.size() - 1) { | |||||
| if (aicpu_tasks_.size() < 2) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); | ||||
| REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); | REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| ++i; | |||||
| const TaskDef ©_task_def = tasks[i]; | |||||
| const TaskDef ©_task_def = aicpu_tasks_[1]; | |||||
| GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | ||||
| } | } | ||||
| aicpu_task->SetModelArgs(model_name_, model_id_); | aicpu_task->SetModelArgs(model_name_, model_id_); | ||||
| single_op.op_task_.reset(aicpu_task); | single_op.op_task_.reset(aicpu_task); | ||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SingleOpModel::NeedHybridModel(GeModelPtr &ge_model, bool &need_hybrid_model) { | |||||
| bool is_infer_depend = false; | |||||
| bool is_host_mem = false; | |||||
| GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); | |||||
| bool need_d2h_cpy = is_infer_depend && !is_host_mem; | |||||
| bool aicpu_multi_task = tbe_tasks_.size() >= 1 && aicpu_tasks_.size() >= 1; | |||||
| bool aicore_multi_task = tbe_tasks_.size() > 1; | |||||
| need_hybrid_model = need_d2h_cpy || aicore_multi_task || aicpu_multi_task; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SingleOpModel::ParseTasks() { | |||||
| auto ge_model = model_helper_.GetGeModel(); | |||||
| GE_CHECK_NOTNULL(ge_model); | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||||
| for (int i = 0; i < tasks.size(); ++i) { | |||||
| TaskDef &task_def = tasks[i]; | |||||
| GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), | |||||
| task_def.DebugString().c_str()); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| const auto &kernel_def = task_def.kernel(); | |||||
| const auto &context = kernel_def.context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||||
| if (kernel_type == ccKernelType::TE) { | |||||
| tbe_tasks_.emplace_back(task_def); | |||||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||||
| aicpu_tasks_.emplace_back(task_def); | |||||
| } else { | |||||
| GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | |||||
| "[Check][Param:TaskDef]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", | |||||
| context.kernel_type()); | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "BuildModelTaskKernel fail for got:%u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.", | |||||
| context.kernel_type()); | |||||
| return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | |||||
| } | |||||
| } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| tbe_tasks_.emplace_back(task_def); | |||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
| aicpu_tasks_.emplace_back(task_def); | |||||
| } else { | } else { | ||||
| // skip | // skip | ||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| @@ -657,6 +668,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
| model_params_.memory_size = UINT64_MAX; | model_params_.memory_size = UINT64_MAX; | ||||
| model_params_.graph_is_dynamic = true; | model_params_.graph_is_dynamic = true; | ||||
| GE_CHK_STATUS_RET(ParseTasks(), "[Parse][Tasks] failed."); | |||||
| auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
| GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
| @@ -76,6 +76,11 @@ class SingleOpModel { | |||||
| void ParseArgTable(OpTask *task, SingleOp &op); | void ParseArgTable(OpTask *task, SingleOp &op); | ||||
| Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | ||||
| Status SetHostMemTensor(DynamicSingleOp &single_op); | Status SetHostMemTensor(DynamicSingleOp &single_op); | ||||
| Status NeedHybridModel(GeModelPtr &ge_model, bool &flag); | |||||
| Status ParseTasks(); | |||||
| std::vector<domi::TaskDef> tbe_tasks_; | |||||
| std::vector<domi::TaskDef> aicpu_tasks_; | |||||
| std::string model_name_; | std::string model_name_; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| @@ -70,7 +70,9 @@ std::vector<void *> BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, | |||||
| return JoinAddresses(addresses); | return JoinAddresses(addresses); | ||||
| } | } | ||||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
| std::string BuildTaskUtils::InnerGetTaskInfo(const OpDescPtr &op_desc, | |||||
| const std::vector<const void *> &input_addrs, | |||||
| const std::vector<const void *> &output_addrs) { | |||||
| std::stringstream ss; | std::stringstream ss; | ||||
| if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
| auto op_type = op_desc->GetType(); | auto op_type = op_desc->GetType(); | ||||
| @@ -87,7 +89,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
| } | } | ||||
| ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | ||||
| ss << TypeUtils::FormatToSerialString(input->GetFormat()); | ss << TypeUtils::FormatToSerialString(input->GetFormat()); | ||||
| ss << VectorToString(input->GetShape().GetDims()); | |||||
| ss << VectorToString(input->GetShape().GetDims()) << " "; | |||||
| if (idx < input_addrs.size()) { | |||||
| ss << input_addrs[idx]; | |||||
| } | |||||
| if (idx < op_desc->GetInputsSize() - 1) { | if (idx < op_desc->GetInputsSize() - 1) { | ||||
| ss << ","; | ss << ","; | ||||
| } | } | ||||
| @@ -101,7 +106,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
| const GeShape &out_shape = output->GetShape(); | const GeShape &out_shape = output->GetShape(); | ||||
| const auto &dims = out_shape.GetDims(); | const auto &dims = out_shape.GetDims(); | ||||
| ss << TypeUtils::FormatToSerialString(out_format); | ss << TypeUtils::FormatToSerialString(out_format); | ||||
| ss << VectorToString(dims); | |||||
| ss << VectorToString(dims) << " "; | |||||
| if (idx < output_addrs.size()) { | |||||
| ss << output_addrs[idx]; | |||||
| } | |||||
| if (idx < op_desc->GetOutputsSize() - 1) { | if (idx < op_desc->GetOutputsSize() - 1) { | ||||
| ss << ","; | ss << ","; | ||||
| } | } | ||||
| @@ -110,4 +118,44 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
| } | } | ||||
| return ss.str(); | return ss.str(); | ||||
| } | } | ||||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
| vector<const void *> input_addrs; | |||||
| vector<const void *> output_addrs; | |||||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||||
| } | |||||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc, | |||||
| const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<DataBuffer> &outputs) { | |||||
| vector<const void *> input_addrs; | |||||
| vector<const void *> output_addrs; | |||||
| GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||||
| if (op_desc->GetAllInputsSize() == inputs.size()) { | |||||
| std::for_each(inputs.begin(), inputs.end(), [&](const DataBuffer &db) { input_addrs.push_back(db.data); }); | |||||
| } | |||||
| if (op_desc->GetOutputsSize() == outputs.size()) { | |||||
| std::for_each(outputs.begin(), outputs.end(), [&](const DataBuffer &db) { output_addrs.push_back(db.data); }); | |||||
| } | |||||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||||
| } | |||||
| std::string BuildTaskUtils::GetTaskInfo(const hybrid::TaskContext &task_context) { | |||||
| auto &node_item = task_context.GetNodeItem(); | |||||
| auto op_desc = node_item.GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||||
| vector<const void *> input_addrs; | |||||
| vector<const void *> output_addrs; | |||||
| if (op_desc->GetAllInputsSize() == static_cast<uint32_t>(task_context.NumInputs())) { | |||||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
| input_addrs.push_back(task_context.GetInput(i)->GetData()); | |||||
| } | |||||
| } | |||||
| if (op_desc->GetOutputsSize() == static_cast<uint32_t>(task_context.NumOutputs())) { | |||||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||||
| output_addrs.push_back(task_context.GetOutput(i)->GetData()); | |||||
| } | |||||
| } | |||||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "single_op/single_op.h" | #include "single_op/single_op.h" | ||||
| #include "single_op/single_op_model.h" | #include "single_op/single_op_model.h" | ||||
| #include "hybrid/node_executor/task_context.h" | |||||
| namespace ge { | namespace ge { | ||||
| class BuildTaskUtils { | class BuildTaskUtils { | ||||
| @@ -35,7 +36,14 @@ class BuildTaskUtils { | |||||
| bool keep_workspace = true); | bool keep_workspace = true); | ||||
| static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | ||||
| static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | ||||
| static std::string InnerGetTaskInfo(const OpDescPtr &op_desc, | |||||
| const std::vector<const void *> &input_addrs, | |||||
| const std::vector<const void *> &output_addrs); | |||||
| static std::string GetTaskInfo(const OpDescPtr &op_desc); | static std::string GetTaskInfo(const OpDescPtr &op_desc); | ||||
| static std::string GetTaskInfo(const OpDescPtr &op_desc, | |||||
| const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<DataBuffer> &outputs); | |||||
| static std::string GetTaskInfo(const hybrid::TaskContext& task_context); | |||||
| template<typename T> | template<typename T> | ||||
| static std::string VectorToString(const std::vector<T> &values) { | static std::string VectorToString(const std::vector<T> &values) { | ||||
| std::stringstream ss; | std::stringstream ss; | ||||
| @@ -89,6 +89,7 @@ Status OpTask::OpenDump(rtStream_t stream) { | |||||
| void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | ||||
| this->stub_name_ = name; | this->stub_name_ = name; | ||||
| this->stub_func_ = stub_func; | this->stub_func_ = stub_func; | ||||
| this->task_name_ = name; | |||||
| } | } | ||||
| void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | ||||
| @@ -345,49 +346,95 @@ Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| const vector<DataBuffer> &input_buffers, | |||||
| vector<GeTensorDesc> &output_desc, | |||||
| vector<DataBuffer> &output_buffers, | |||||
| rtStream_t stream) { | |||||
| GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||||
| std::vector<void *> args; | |||||
| for (auto &buffer : input_buffers) { | |||||
| args.emplace_back(buffer.data); | |||||
| Status TbeOpTask::UpdateTilingArgs(rtStream_t stream) { | |||||
| size_t args_size = input_num_ + output_num_ + workspaces_.size(); | |||||
| if (tiling_buffer_ != nullptr) { | |||||
| args_size++; | |||||
| } | } | ||||
| for (auto &buffer : output_buffers) { | |||||
| args.emplace_back(buffer.data); | |||||
| size_t temp_size = args_size * sizeof(void *); | |||||
| if (arg_size_ < temp_size) { | |||||
| GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||||
| std::unique_ptr<uint8_t[]> args(new (std::nothrow) uint8_t[temp_size]()); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| if (memcpy_s(args.get(), temp_size, args_.get(), arg_size_) != EOK) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | |||||
| REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
| } | |||||
| args_ = std::move(args); | |||||
| arg_size_ = temp_size; | |||||
| } | } | ||||
| for (auto &buffer : workspaces_) { | |||||
| args.emplace_back(buffer); | |||||
| uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); | |||||
| size_t arg_index = input_num_ + output_num_; | |||||
| for (size_t i = 0; i < workspaces_.size(); ++i) { | |||||
| arg_base[arg_index++] = reinterpret_cast<uintptr_t>(workspaces_[i]); | |||||
| } | } | ||||
| if (tiling_buffer_ != nullptr) { | if (tiling_buffer_ != nullptr) { | ||||
| GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); | GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); | ||||
| GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), | GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), | ||||
| RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | ||||
| arg_base[arg_index] = reinterpret_cast<uintptr_t>(tiling_buffer_); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeOpTask::SetArgIndex() { | |||||
| const vector<bool> v_is_input_const = op_desc_->GetIsInputConst(); | |||||
| size_t input_index = 0; | |||||
| for (size_t i = 0; i < op_desc_->GetAllInputsSize(); ++i) { | |||||
| const GeTensorDescPtr tensor_desc = op_desc_->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
| if (tensor_desc == nullptr) { | |||||
| GELOGD("SingleOp: %s, Index: %zu, has no input", op_desc_->GetName().c_str(), i); | |||||
| continue; | |||||
| } | |||||
| if (i < v_is_input_const.size() && v_is_input_const[i]) { | |||||
| GELOGD("SingleOp: %s, Index: %zu, input is const", op_desc_->GetName().c_str(), i); | |||||
| input_index++; | |||||
| continue; | |||||
| } | |||||
| arg_index_.emplace_back(input_index); | |||||
| input_index++; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| args.emplace_back(tiling_buffer_); | |||||
| Status TbeOpTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) { | |||||
| if (arg_index_.size() != inputs.size()) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size] Args size is %zu, but get input size is %zu.", | |||||
| arg_index_.size(), inputs.size()); | |||||
| REPORT_INNER_ERROR("E19999", "[Check][Size] Args size is %zu, but get input size is %zu.", | |||||
| arg_index_.size(), inputs.size()); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | |||||
| } | } | ||||
| GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *)); | |||||
| // node with workspace: build can not get size of workspace, need to update arg_size_ when execute | |||||
| if (arg_size_ < (args.size() * sizeof(void *))) { | |||||
| size_t temp_size = args.size() * sizeof(void *); | |||||
| GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||||
| args_.reset(new(std::nothrow) uint8_t[temp_size]()); | |||||
| GE_CHECK_NOTNULL(args_); | |||||
| arg_size_ = temp_size; | |||||
| uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); | |||||
| for (size_t i = 0; i < arg_index_.size(); ++i) { | |||||
| arg_base[arg_index_[i]] = reinterpret_cast<uintptr_t>(inputs[i].data); | |||||
| } | } | ||||
| if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | |||||
| REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
| for (size_t i = 0; i < op_desc_->GetOutputsSize(); ++i) { | |||||
| arg_base[input_num_ + i] = reinterpret_cast<uintptr_t>(outputs[i].data); | |||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| const vector<DataBuffer> &input_buffers, | |||||
| vector<GeTensorDesc> &output_desc, | |||||
| vector<DataBuffer> &output_buffers, | |||||
| rtStream_t stream) { | |||||
| GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | |||||
| GE_CHK_STATUS_RET(UpdateIoAddr(input_buffers, output_buffers), "[Update][IoAddr] failed."); | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||||
| GE_CHK_STATUS_RET(UpdateTilingArgs(stream), "[Update][TilingArgs] failed."); | |||||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); | GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); | ||||
| @@ -33,6 +33,10 @@ | |||||
| #include "register/op_tiling.h" | #include "register/op_tiling.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | |||||
| const int kAddressNum = 2; | |||||
| } // namespace | |||||
| class StreamResource; | class StreamResource; | ||||
| struct SingleOpModelParam; | struct SingleOpModelParam; | ||||
| class OpTask { | class OpTask { | ||||
| @@ -44,6 +48,7 @@ class OpTask { | |||||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
| void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | ||||
| const std::string &GetTaskName() const {return task_name_;} | |||||
| void SetOpDesc(const OpDescPtr &op_desc) { | void SetOpDesc(const OpDescPtr &op_desc) { | ||||
| op_desc_ = op_desc; | op_desc_ = op_desc; | ||||
| } | } | ||||
| @@ -66,6 +71,7 @@ class OpTask { | |||||
| std::string model_name_; | std::string model_name_; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
| std::string task_name_; | |||||
| }; | }; | ||||
| class TbeOpTask : public OpTask { | class TbeOpTask : public OpTask { | ||||
| @@ -85,6 +91,7 @@ class TbeOpTask : public OpTask { | |||||
| const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | ||||
| Status UpdateRunInfo() override; | Status UpdateRunInfo() override; | ||||
| Status SetArgIndex(); | |||||
| const void *GetArgs() const; | const void *GetArgs() const; | ||||
| size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
| @@ -100,7 +107,9 @@ class TbeOpTask : public OpTask { | |||||
| Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | ||||
| const vector<GeTensorDesc> &output_desc); | const vector<GeTensorDesc> &output_desc); | ||||
| Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | ||||
| Status UpdateTilingArgs(rtStream_t stream); | |||||
| Status DoLaunchKernel(rtStream_t stream); | Status DoLaunchKernel(rtStream_t stream); | ||||
| Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs); | |||||
| const void *stub_func_ = nullptr; | const void *stub_func_ = nullptr; | ||||
| std::unique_ptr<uint8_t[]> args_; | std::unique_ptr<uint8_t[]> args_; | ||||
| @@ -120,6 +129,9 @@ class TbeOpTask : public OpTask { | |||||
| void* handle_ = nullptr; | void* handle_ = nullptr; | ||||
| std::string original_kernel_key_; | std::string original_kernel_key_; | ||||
| std::string node_info_; | std::string node_info_; | ||||
| std::vector<size_t> arg_index_; // data index in args | |||||
| size_t input_num_; // include const input | |||||
| size_t output_num_; | |||||
| }; | }; | ||||
| class AiCpuBaseTask : public OpTask { | class AiCpuBaseTask : public OpTask { | ||||
| @@ -266,7 +278,7 @@ class MemcpyAsyncTask : public OpTask { | |||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| friend class RtsKernelTaskBuilder; | friend class RtsKernelTaskBuilder; | ||||
| uintptr_t addresses_[2]; | |||||
| uintptr_t addresses_[kAddressNum]; | |||||
| size_t dst_max_; | size_t dst_max_; | ||||
| size_t count_; | size_t count_; | ||||
| rtMemcpyKind_t kind_; | rtMemcpyKind_t kind_; | ||||
| @@ -104,7 +104,7 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi | |||||
| binary.version = 0; | binary.version = 0; | ||||
| binary.data = kernel_bin.GetBinData(); | binary.data = kernel_bin.GetBinData(); | ||||
| binary.length = kernel_bin.GetBinDataSize(); | binary.length = kernel_bin.GetBinDataSize(); | ||||
| binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||||
| GE_CHK_STATUS_RET_NOLOG(GetMagic(binary.magic)); | |||||
| Status ret = 0; | Status ret = 0; | ||||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | ||||
| ret = rtRegisterAllKernel(&binary, bin_handle); | ret = rtRegisterAllKernel(&binary, bin_handle); | ||||
| @@ -387,6 +387,9 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ | |||||
| } | } | ||||
| task.SetStubFunc(stub_name_, stub_func); | task.SetStubFunc(stub_name_, stub_func); | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(task.SetArgIndex(), "[Set][ArgTable] failed."); | |||||
| task.input_num_ = op_desc_->GetInputsSize(); | |||||
| task.output_num_ = op_desc_->GetOutputsSize(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -413,4 +416,27 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeTaskBuilder::GetMagic(uint32_t &magic) const { | |||||
| std::string json_string; | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_, TVM_ATTR_NAME_MAGIC, json_string), | |||||
| GELOGD("Get original type of session_graph_id.")); | |||||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||||
| magic = RT_DEV_BINARY_MAGIC_ELF; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||||
| magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") { | |||||
| magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE; | |||||
| } else { | |||||
| REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s), value:%s check invalid", | |||||
| TVM_ATTR_NAME_MAGIC.c_str(), op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str(), json_string.c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in op:%s(%s), value:%s check invalid", | |||||
| TVM_ATTR_NAME_MAGIC.c_str(), op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str(), json_string.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -105,6 +105,7 @@ class TbeTaskBuilder { | |||||
| const SingleOpModelParam ¶m); | const SingleOpModelParam ¶m); | ||||
| Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | ||||
| Status DoRegisterMeta(void *bin_handle); | Status DoRegisterMeta(void *bin_handle); | ||||
| Status GetMagic(uint32_t &magic) const; | |||||
| static Status DoRegisterFunction(void *bin_handle, const char *stub_name, const char *kernel_name); | static Status DoRegisterFunction(void *bin_handle, const char *stub_name, const char *kernel_name); | ||||
| @@ -84,9 +84,10 @@ inline bool IsLogEnable(int module_name, int log_level) { | |||||
| ##__VA_ARGS__); \ | ##__VA_ARGS__); \ | ||||
| } while (0) | } while (0) | ||||
| #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||||
| dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | |||||
| #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||||
| dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||||
| ##__VA_ARGS__) | |||||
| // print memory when it is greater than 1KB. | // print memory when it is greater than 1KB. | ||||
| #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 2ad00e17886fd06c0d00f8a8cf370783a3d31818 | |||||
| Subproject commit 3e14f92d47abc9a2e703be2171f047553f7597e0 | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit 79536a196f89cf7a1f5852ff7304b9a7d7b12eff | |||||
| Subproject commit 4151e33028c518057289b569b36cd4069af362a4 | |||||
| @@ -38,5 +38,20 @@ RUN wget https://github.com/ccup/lcov/archive/refs/tags/add_lcov.tar.gz -O add_l | |||||
| ENV PROJECT_HOME=/code/Turing/graphEngine | ENV PROJECT_HOME=/code/Turing/graphEngine | ||||
| RUN mkdir /var/run/sshd | |||||
| RUN echo "root:root" | chpasswd | |||||
| RUN sed -i 's/\#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config | |||||
| RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd | |||||
| ENV NOTVISIBLE "in users profile" | |||||
| RUN echo "export VISIBLE=now" >> /etc/profile | |||||
| EXPOSE 22 7777 | |||||
| RUN useradd -ms /bin/bash debugger | |||||
| RUN echo "debugger:ge123" | chpasswd | |||||
| CMD ["/usr/sbin/sshd" "-D" "&"] | |||||
| RUN echo "alias ge=/code/Turing/graphEngine/scripts/ge.sh">>~/.bashrc | RUN echo "alias ge=/code/Turing/graphEngine/scripts/ge.sh">>~/.bashrc | ||||
| @@ -21,7 +21,7 @@ MOUNT_PROJECT_HOME=$(cd $PROJECT_HOME || return; pwd) | |||||
| DOCKER_BUILD_ENV_NAME=${MOUNT_PROJECT_HOME#*/} | DOCKER_BUILD_ENV_NAME=${MOUNT_PROJECT_HOME#*/} | ||||
| DOCKER_BUILD_ENV_NAME=${DOCKER_BUILD_ENV_NAME//\//\_} | DOCKER_BUILD_ENV_NAME=${DOCKER_BUILD_ENV_NAME//\//\_} | ||||
| DOCKER_IMAGE_TAG=ge_build_env.1.0.6 | |||||
| DOCKER_IMAGE_TAG=ge_build_env.1.0.9 | |||||
| DOCKER_IAMGE_NAME=joycode2art/turing | DOCKER_IAMGE_NAME=joycode2art/turing | ||||
| DOCKER_FULL_IMAGE_NAME=${DOCKER_IAMGE_NAME}:${DOCKER_IMAGE_TAG} | DOCKER_FULL_IMAGE_NAME=${DOCKER_IAMGE_NAME}:${DOCKER_IMAGE_TAG} | ||||
| @@ -61,7 +61,7 @@ function enter_docker_env(){ | |||||
| if test -z "$(docker images |grep ${DOCKER_IAMGE_NAME} | grep ${DOCKER_IMAGE_TAG})"; then | if test -z "$(docker images |grep ${DOCKER_IAMGE_NAME} | grep ${DOCKER_IMAGE_TAG})"; then | ||||
| echo "please run 'ge env --pull' to download images first!" | echo "please run 'ge env --pull' to download images first!" | ||||
| elif test -z "$(docker ps -a |grep ${DOCKER_BUILD_ENV_NAME})"; then | elif test -z "$(docker ps -a |grep ${DOCKER_BUILD_ENV_NAME})"; then | ||||
| $docker_cmd run -it -v ${MOUNT_PROJECT_HOME}:/code/Turing/graphEngine --workdir ${docker_work_dir} --name ${DOCKER_BUILD_ENV_NAME} ${DOCKER_FULL_IMAGE_NAME} ${docker_bash_dir} | |||||
| $docker_cmd run -p 7002:22 -p 7003:7777 --privileged=true -it -v ${MOUNT_PROJECT_HOME}:/code/Turing/graphEngine --workdir ${docker_work_dir} --name ${DOCKER_BUILD_ENV_NAME} ${DOCKER_FULL_IMAGE_NAME} ${docker_bash_dir} | |||||
| elif test -z "$(docker ps |grep ${DOCKER_BUILD_ENV_NAME})"; then | elif test -z "$(docker ps |grep ${DOCKER_BUILD_ENV_NAME})"; then | ||||
| $docker_cmd start ${DOCKER_BUILD_ENV_NAME} | $docker_cmd start ${DOCKER_BUILD_ENV_NAME} | ||||
| $docker_cmd exec -w ${docker_work_dir} -it ${DOCKER_BUILD_ENV_NAME} ${docker_bash_dir} | $docker_cmd exec -w ${docker_work_dir} -it ${DOCKER_BUILD_ENV_NAME} ${docker_bash_dir} | ||||
| @@ -38,7 +38,7 @@ function extract_deps_so_community() | |||||
| { | { | ||||
| echo "begin to extract .run file ........." | echo "begin to extract .run file ........." | ||||
| chmod +x ./${DRIVER_RUN_NAME_C} | chmod +x ./${DRIVER_RUN_NAME_C} | ||||
| chmod +X ./${PACKAGE_NAME_C} | |||||
| chmod +x ./${PACKAGE_NAME_C} | |||||
| [ -n "${DEP_TMP_DIR}" ] && rm -rf "${DEP_TMP_DIR}" | [ -n "${DEP_TMP_DIR}" ] && rm -rf "${DEP_TMP_DIR}" | ||||
| ./${DRIVER_RUN_NAME_C} --noexec --extract=${DEP_TMP_DIR}/driver | ./${DRIVER_RUN_NAME_C} --noexec --extract=${DEP_TMP_DIR}/driver | ||||
| ./${PACKAGE_NAME_C} --noexec --extract=${DEP_TMP_DIR}/Packages_tmp | ./${PACKAGE_NAME_C} --noexec --extract=${DEP_TMP_DIR}/Packages_tmp | ||||
| @@ -22,6 +22,7 @@ add_subdirectory(depends/runtime) | |||||
| add_subdirectory(depends/hccl) | add_subdirectory(depends/hccl) | ||||
| add_subdirectory(depends/profiler) | add_subdirectory(depends/profiler) | ||||
| add_subdirectory(depends/error_manager) | add_subdirectory(depends/error_manager) | ||||
| add_subdirectory(depends/opt_info) | |||||
| if (ENABLE_GE_COV OR ENABLE_GE_UT) | if (ENABLE_GE_COV OR ENABLE_GE_UT) | ||||
| add_subdirectory(ut) | add_subdirectory(ut) | ||||
| @@ -60,6 +60,7 @@ set(SRCS | |||||
| "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" | "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" | "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" | "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/utils/dumper/ge_graph_dumper.cc" | |||||
| "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" | "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" | "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" | "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" | ||||
| @@ -345,6 +345,10 @@ INT32 mmIsDir(const CHAR *fileName) | |||||
| INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len) | INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len) | ||||
| { | { | ||||
| const char *env = getenv(name); | |||||
| if (env != nullptr) { | |||||
| strcpy(value, env); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -0,0 +1,37 @@ | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| #cmake_minimum_required(VERSION 2.8) | |||||
| project(opt_feature_stub) | |||||
| file(GLOB_RECURSE SRCS RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "src/opt_info_stub.cc" | |||||
| ) | |||||
| include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info) | |||||
| add_library(opt_feature_stub SHARED ${SRCS}) | |||||
| target_compile_options(opt_feature_stub PRIVATE | |||||
| -g | |||||
| ) | |||||
| target_link_libraries(opt_feature_stub PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| c_sec | |||||
| ) | |||||
| target_include_directories(opt_feature_stub INTERFACE ${CMAKE_CURRENT_LIST_DIR}/src) | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "opt_info.h" | |||||
| #include <string> | |||||
| #include <map> | |||||
| #include <vector> | |||||
| #include <algorithm> | |||||
| namespace gelc { | |||||
| namespace { | |||||
| const std::vector<std::string> kSocVersions = {"Ascend910"}; | |||||
| } | |||||
| void SetAllOptInfo(std::map<std::string, std::string> &opt_infos) { | |||||
| opt_infos.emplace("opt_module.fe", "all"); | |||||
| opt_infos.emplace("opt_module.pass", "all"); | |||||
| opt_infos.emplace("opt_module.op_tune", "all"); | |||||
| opt_infos.emplace("opt_module.rl_tune", "all"); | |||||
| opt_infos.emplace("opt_module.aoe", "all"); | |||||
| } | |||||
| Status GetOptInfo(WorkMode mode, const std::string &soc_ver, | |||||
| std::map<std::string, std::string> &opt_infos) { | |||||
| if (std::find(kSocVersions.begin(), kSocVersions.end(), soc_ver)== kSocVersions.end()) { | |||||
| SetAllOptInfo(opt_infos); | |||||
| return SUCCESS; | |||||
| } | |||||
| opt_infos.emplace("opt_module.fe", "all"); | |||||
| opt_infos.emplace("opt_module.pass", "all"); | |||||
| opt_infos.emplace("opt_module.op_tune", "all"); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace gelc | |||||
| @@ -23,13 +23,46 @@ | |||||
| void dav_log(int module_id, const char *fmt, ...) {} | void dav_log(int module_id, const char *fmt, ...) {} | ||||
| void DlogErrorInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||||
| static int log_level = DLOG_ERROR; | |||||
| #define __DO_PRINT() \ | |||||
| do { \ | |||||
| const int FMT_BUFF_SIZE = 1024; \ | |||||
| char fmt_buff[FMT_BUFF_SIZE] = {0}; \ | |||||
| va_list valist; \ | |||||
| va_start(valist, fmt); \ | |||||
| vsnprintf(fmt_buff, FMT_BUFF_SIZE, fmt, valist); \ | |||||
| va_end(valist); \ | |||||
| printf("%s \n", fmt_buff); \ | |||||
| } while (0) | |||||
| void DlogErrorInner(int module_id, const char *fmt, ...) { | |||||
| if (log_level > DLOG_ERROR) { | |||||
| return; | |||||
| } | |||||
| __DO_PRINT(); | |||||
| } | |||||
| void DlogWarnInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||||
| void DlogWarnInner(int module_id, const char *fmt, ...) { | |||||
| if (log_level > DLOG_WARN) { | |||||
| return; | |||||
| } | |||||
| __DO_PRINT(); | |||||
| } | |||||
| void DlogInfoInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||||
| void DlogInfoInner(int module_id, const char *fmt, ...) { | |||||
| if (log_level > DLOG_INFO) { | |||||
| return; | |||||
| } | |||||
| __DO_PRINT(); | |||||
| } | |||||
| void DlogDebugInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||||
| void DlogDebugInner(int module_id, const char *fmt, ...) { | |||||
| if (log_level > DLOG_DEBUG) { | |||||
| return; | |||||
| } | |||||
| __DO_PRINT(); | |||||
| } | |||||
| void DlogEventInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | void DlogEventInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | ||||
| @@ -39,30 +72,25 @@ void DlogWithKVInner(int module_id, int level, KeyValue *pst_kv_array, int kv_nu | |||||
| dav_log(module_id, fmt); | dav_log(module_id, fmt); | ||||
| } | } | ||||
| int dlog_setlevel(int module_id, int level, int enable_event) { return DLOG_DEBUG; } | |||||
| int dlog_setlevel(int module_id, int level, int enable_event) { | |||||
| log_level = level; | |||||
| return log_level; | |||||
| } | |||||
| int dlog_getlevel(int module_id, int *enable_event) { return DLOG_DEBUG; } | |||||
| int dlog_getlevel(int module_id, int *enable_event) { return log_level; } | |||||
| int CheckLogLevel(int moduleId, int logLevel) | |||||
| { | |||||
| return 1; | |||||
| } | |||||
| int CheckLogLevel(int moduleId, int log_level_check) { return log_level >= log_level_check; } | |||||
| /** | /** | ||||
| * @ingroup plog | * @ingroup plog | ||||
| * @brief DlogReportInitialize: init log in service process before all device setting. | * @brief DlogReportInitialize: init log in service process before all device setting. | ||||
| * @return: 0: SUCCEED, others: FAILED | * @return: 0: SUCCEED, others: FAILED | ||||
| */ | */ | ||||
| int DlogReportInitialize() { | |||||
| return 0; | |||||
| } | |||||
| int DlogReportInitialize() { return 0; } | |||||
| /** | /** | ||||
| * @ingroup plog | * @ingroup plog | ||||
| * @brief DlogReportFinalize: release log resource in service process after all device reset. | * @brief DlogReportFinalize: release log resource in service process after all device reset. | ||||
| * @return: 0: SUCCEED, others: FAILED | * @return: 0: SUCCEED, others: FAILED | ||||
| */ | */ | ||||
| int DlogReportFinalize() { | |||||
| return 0; | |||||
| } | |||||
| int DlogReportFinalize() { return 0; } | |||||
| @@ -15,18 +15,5 @@ | |||||
| include(cmake/graphengine.cmake) | include(cmake/graphengine.cmake) | ||||
| add_subdirectory(easy_graph) | add_subdirectory(easy_graph) | ||||
| add_subdirectory(stub_engine) | |||||
| add_subdirectory(ge_graph_dsl) | add_subdirectory(ge_graph_dsl) | ||||
| file(GLOB_RECURSE UTILS_SRC CONFIGURE_DEPENDS | |||||
| "utils/*.cc" | |||||
| ) | |||||
| add_library(framework STATIC ${UTILS_SRC}) | |||||
| target_include_directories(framework | |||||
| PUBLIC utils/ | |||||
| ) | |||||
| set_target_properties(framework PROPERTIES CXX_STANDARD 11) | |||||
| target_link_libraries(framework PUBLIC ge_graph_dsl graphengine fe) | |||||
| add_subdirectory(ge_running_env) | |||||
| @@ -103,6 +103,7 @@ list(APPEND INCLUDE_DIRECTORIES | |||||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/cce" | "${GE_CODE_DIR}/third_party/fwkacllib/inc/cce" | ||||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/ops" | "${GE_CODE_DIR}/third_party/fwkacllib/inc/ops" | ||||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain" | "${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain" | ||||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info" | |||||
| "${GE_CODE_DIR}/tests/ut/ge" | "${GE_CODE_DIR}/tests/ut/ge" | ||||
| "${GE_CODE_DIR}/tests/ut/common" | "${GE_CODE_DIR}/tests/ut/common" | ||||
| "${CMAKE_BINARY_DIR}" | "${CMAKE_BINARY_DIR}" | ||||
| @@ -117,6 +118,7 @@ list(APPEND STUB_LIBS | |||||
| runtime_stub | runtime_stub | ||||
| profiler_stub | profiler_stub | ||||
| hccl_stub | hccl_stub | ||||
| opt_feature_stub | |||||
| error_manager_stub | error_manager_stub | ||||
| ascend_protobuf | ascend_protobuf | ||||
| json | json | ||||
| @@ -150,7 +152,7 @@ set_target_properties(metadef_graph PROPERTIES CXX_STANDARD 11) | |||||
| # ---- Target : Local engine ---- | # ---- Target : Local engine ---- | ||||
| add_library(ge_local_engine SHARED ${LOCAL_ENGINE_SRC} ${METADEF_REGISTER_SRCS}) | |||||
| add_library(ge_local_engine SHARED ${LOCAL_ENGINE_SRC}) | |||||
| target_include_directories(ge_local_engine | target_include_directories(ge_local_engine | ||||
| PUBLIC | PUBLIC | ||||
| @@ -169,38 +171,11 @@ target_compile_options(ge_local_engine PRIVATE | |||||
| target_link_libraries(ge_local_engine PUBLIC | target_link_libraries(ge_local_engine PUBLIC | ||||
| $<BUILD_INTERFACE:intf_pub> ${STUB_LIBS} | $<BUILD_INTERFACE:intf_pub> ${STUB_LIBS} | ||||
| metadef_graph | |||||
| -lrt -ldl -lpthread -lgcov | -lrt -ldl -lpthread -lgcov | ||||
| ) | ) | ||||
| set_target_properties(ge_local_engine PROPERTIES CXX_STANDARD 11) | set_target_properties(ge_local_engine PROPERTIES CXX_STANDARD 11) | ||||
| # ---- Target : Host engine ---- | |||||
| add_library(host_cpu_engine SHARED ${HOST_ENGINE_SRC}) | |||||
| target_include_directories(host_cpu_engine | |||||
| PUBLIC | |||||
| "${INCLUDE_DIRECTORIES}" | |||||
| "${GE_CODE_DIR}/ge/host_cpu_engine" | |||||
| ) | |||||
| target_compile_definitions(host_cpu_engine PRIVATE | |||||
| google=ascend_private | |||||
| FMK_SUPPORT_DUMP | |||||
| ) | |||||
| target_compile_options(host_cpu_engine PRIVATE | |||||
| -g --coverage -fprofile-arcs -ftest-coverage | |||||
| -Werror=format | |||||
| ) | |||||
| target_link_libraries(host_cpu_engine PUBLIC | |||||
| $<BUILD_INTERFACE:intf_pub> ${STUB_LIBS} metadef_graph -lrt -ldl -lpthread -lgcov | |||||
| ) | |||||
| set_target_properties(host_cpu_engine PROPERTIES CXX_STANDARD 11) | |||||
| # ---- Target : engine plugin---- | # ---- Target : engine plugin---- | ||||
| # | # | ||||
| @@ -273,4 +248,4 @@ target_link_libraries(graphengine PUBLIC | |||||
| ) | ) | ||||
| set_target_properties(graphengine PROPERTIES CXX_STANDARD 11) | set_target_properties(graphengine PROPERTIES CXX_STANDARD 11) | ||||
| add_dependencies(graphengine host_cpu_engine ge_local_engine nnengine engine_conf.json optimizer_priority.pbtxt) | |||||
| add_dependencies(graphengine ge_local_engine nnengine engine_conf.json optimizer_priority.pbtxt) | |||||
| @@ -26,16 +26,32 @@ EG_NS_BEGIN | |||||
| //////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////// | ||||
| namespace detail { | namespace detail { | ||||
| template<typename GRAPH_BUILDER> | |||||
| template <typename GRAPH_BUILDER> | |||||
| Graph BuildGraph(const char *name, GRAPH_BUILDER builderInDSL) { | Graph BuildGraph(const char *name, GRAPH_BUILDER builderInDSL) { | ||||
| GraphBuilder builder(name); | GraphBuilder builder(name); | ||||
| builderInDSL(builder); | builderInDSL(builder); | ||||
| return std::move(*builder); | return std::move(*builder); | ||||
| } | } | ||||
| struct GraphDefiner { | |||||
| GraphDefiner(const char *defaultName, const char *specifiedName = nullptr) { | |||||
| name = specifiedName ? specifiedName : defaultName; | |||||
| } | |||||
| template <typename USER_BUILDER> | |||||
| auto operator|(USER_BUILDER &&userBuilder) { | |||||
| GraphBuilder graphBuilder{name}; | |||||
| std::forward<USER_BUILDER>(userBuilder)(graphBuilder); | |||||
| return *graphBuilder; | |||||
| } | |||||
| private: | |||||
| const char *name; | |||||
| }; | |||||
| } // namespace detail | } // namespace detail | ||||
| #define HAS_NAME(...) NOT_EMPTY_SELECT(__VA_ARGS__) | |||||
| #define DEF_GRAPH(G, ...) ::EG_NS::Graph G = ::EG_NS::detail::BuildGraph(HAS_NAME(__VA_ARGS__)(__VA_ARGS__, #G), [&](::EG_NS::GraphBuilder& BUILDER) | |||||
| #define DEF_GRAPH(G, ...) ::EG_NS::Graph G = ::EG_NS::detail::GraphDefiner(#G, ##__VA_ARGS__) | [&](auto &&BUILDER) | |||||
| #define DATA_CHAIN(...) ::EG_NS::ChainBuilder(BUILDER, ::EG_NS::EdgeType::DATA)->__VA_ARGS__ | #define DATA_CHAIN(...) ::EG_NS::ChainBuilder(BUILDER, ::EG_NS::EdgeType::DATA)->__VA_ARGS__ | ||||
| #define CTRL_CHAIN(...) ::EG_NS::ChainBuilder(BUILDER, ::EG_NS::EdgeType::CTRL)->__VA_ARGS__ | #define CTRL_CHAIN(...) ::EG_NS::ChainBuilder(BUILDER, ::EG_NS::EdgeType::CTRL)->__VA_ARGS__ | ||||
| #define CHAIN(...) DATA_CHAIN(__VA_ARGS__) | #define CHAIN(...) DATA_CHAIN(__VA_ARGS__) | ||||
| @@ -16,10 +16,15 @@ | |||||
| #include "easy_graph/layout/graph_layout.h" | #include "easy_graph/layout/graph_layout.h" | ||||
| #include "easy_graph/layout/layout_executor.h" | #include "easy_graph/layout/layout_executor.h" | ||||
| #include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h" | |||||
| #include "easy_graph/graph/graph.h" | #include "easy_graph/graph/graph.h" | ||||
| EG_NS_BEGIN | EG_NS_BEGIN | ||||
| namespace { | |||||
| GraphEasyExecutor default_executor; | |||||
| } | |||||
| void GraphLayout::Config(LayoutExecutor &executor, const LayoutOption *opts) { | void GraphLayout::Config(LayoutExecutor &executor, const LayoutOption *opts) { | ||||
| this->executor_ = &executor; | this->executor_ = &executor; | ||||
| options_ = opts; | options_ = opts; | ||||
| @@ -27,8 +32,7 @@ void GraphLayout::Config(LayoutExecutor &executor, const LayoutOption *opts) { | |||||
| Status GraphLayout::Layout(const Graph &graph, const LayoutOption *opts) { | Status GraphLayout::Layout(const Graph &graph, const LayoutOption *opts) { | ||||
| const LayoutOption *options = opts ? opts : this->options_; | const LayoutOption *options = opts ? opts : this->options_; | ||||
| if (!executor_) | |||||
| return EG_UNIMPLEMENTED; | |||||
| if (!executor_) return static_cast<LayoutExecutor &>(default_executor).Layout(graph, options); | |||||
| return executor_->Layout(graph, options); | return executor_->Layout(graph, options); | ||||
| } | } | ||||
| @@ -0,0 +1,37 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef D52AA06185E34BBFB714FFBCDAB0D53A | |||||
| #define D52AA06185E34BBFB714FFBCDAB0D53A | |||||
| #include "ge_graph_dsl/ge.h" | |||||
| #include <exception> | |||||
| #include <string> | |||||
| GE_NS_BEGIN | |||||
| struct AssertError : std::exception { | |||||
| AssertError(const char *file, int line, const std::string &info); | |||||
| private: | |||||
| const char *what() const noexcept override; | |||||
| private: | |||||
| std::string info; | |||||
| }; | |||||
| GE_NS_END | |||||
| #endif | |||||
| @@ -0,0 +1,32 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_31309AA0A4E44C009C22AD9351BF3410 | |||||
| #define INC_31309AA0A4E44C009C22AD9351BF3410 | |||||
| #include "ge_graph_dsl/ge.h" | |||||
| #include "graph/compute_graph.h" | |||||
| GE_NS_BEGIN | |||||
| using GraphCheckFun = std::function<void(const ::GE_NS::ComputeGraphPtr &)>; | |||||
| struct CheckUtils { | |||||
| static bool CheckGraph(const std::string &phase_id, const GraphCheckFun &fun); | |||||
| static void init(); | |||||
| }; | |||||
| GE_NS_END | |||||
| #endif | |||||
| @@ -0,0 +1,32 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef C8B32320BD4943D588594B82FFBF2685 | |||||
| #define C8B32320BD4943D588594B82FFBF2685 | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "ge_graph_dsl/ge.h" | |||||
| GE_NS_BEGIN | |||||
| struct FilterScopeGuard { | |||||
| FilterScopeGuard(const std::vector<std::string> &); | |||||
| ~FilterScopeGuard(); | |||||
| }; | |||||
| GE_NS_END | |||||
| #endif | |||||
| @@ -0,0 +1,59 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AD954C4ADF5B44F5B1CC8BCD72EE9ED6 | |||||
| #define AD954C4ADF5B44F5B1CC8BCD72EE9ED6 | |||||
| #include "ge_graph_dsl/ge.h" | |||||
| #include "ge_graph_dsl/assert/check_utils.h" | |||||
| #include "ge_graph_dsl/assert/assert_error.h" | |||||
| #include "ge_graph_dsl/assert/filter_scope_guard.h" | |||||
| GE_NS_BEGIN | |||||
| #ifdef GTEST_MESSAGE_AT_ | |||||
| #define GRAPH_CHECK_MESSAGE(file, line, message) \ | |||||
| GTEST_MESSAGE_AT_(file, line, message, ::testing::TestPartResult::kFatalFailure) | |||||
| #elif | |||||
| #define GRAPH_CHECK_MESSAGE(file, line, message) throw AssertError(file, line, message) | |||||
| #endif | |||||
| namespace detail { | |||||
| struct GraphAssert { | |||||
| GraphAssert(const char *file, unsigned int line, const std::string &phase_id) | |||||
| : file_(file), line_(line), phase_id_(phase_id) {} | |||||
| void operator|(const ::GE_NS::GraphCheckFun &check_fun) { | |||||
| bool ret = ::GE_NS::CheckUtils::CheckGraph(phase_id_, check_fun); | |||||
| if (!ret) { | |||||
| auto message = "expect dump graph in phase: [" + phase_id_ + "], while not find the dump graph! "; | |||||
| GRAPH_CHECK_MESSAGE(file_, line_, message.c_str()); | |||||
| } | |||||
| } | |||||
| private: | |||||
| const char *file_; | |||||
| unsigned int line_; | |||||
| const std::string phase_id_; | |||||
| }; | |||||
| } // namespace detail | |||||
| #define DUMP_GRAPH_WHEN(...) ::GE_NS::FilterScopeGuard guard__COUNTER__({__VA_ARGS__}); | |||||
| #define CHECK_GRAPH(phase_id) \ | |||||
| ::GE_NS::detail::GraphAssert(__FILE__, __LINE__, #phase_id) | [&](const ::GE_NS::ComputeGraphPtr &graph) | |||||
| GE_NS_END | |||||
| #endif | |||||
| @@ -33,14 +33,12 @@ struct OpDescCfg { | |||||
| std::vector<int64_t> shape_; | std::vector<int64_t> shape_; | ||||
| }; | }; | ||||
| OpDescCfg(const OpType &type, int in_cnt = 0, int out_cnt = 0, Format format = FORMAT_NCHW, | |||||
| OpDescCfg(const OpType &type, int in_cnt = 1, int out_cnt = 1, Format format = FORMAT_NCHW, | |||||
| DataType data_type = DT_FLOAT, std::vector<int64_t> shape = {1, 1, 224, 224}) | DataType data_type = DT_FLOAT, std::vector<int64_t> shape = {1, 1, 224, 224}) | ||||
| : type_(type), in_cnt_(in_cnt), out_cnt_(out_cnt), default_tensor_(format, data_type, shape) {} | : type_(type), in_cnt_(in_cnt), out_cnt_(out_cnt), default_tensor_(format, data_type, shape) {} | ||||
| protected: | protected: | ||||
| OpType GetType() const { | |||||
| return type_; | |||||
| } | |||||
| OpType GetType() const { return type_; } | |||||
| OpType type_; | OpType type_; | ||||
| int in_cnt_; | int in_cnt_; | ||||
| int out_cnt_; | int out_cnt_; | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include "ge_graph_dsl/ge.h" | #include "ge_graph_dsl/ge.h" | ||||
| #include "ge_graph_dsl/op_desc/op_box.h" | #include "ge_graph_dsl/op_desc/op_box.h" | ||||
| #include "ge_graph_dsl/op_desc/op_desc_cfg.h" | #include "ge_graph_dsl/op_desc/op_desc_cfg.h" | ||||
| #include "graph/ge_attr_value.h" | |||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| GE_NS_BEGIN | GE_NS_BEGIN | ||||
| @@ -29,19 +30,32 @@ struct OpDescCfgBox : OpBox, private OpDescCfg { | |||||
| OpDescCfgBox(const OpType &opType); | OpDescCfgBox(const OpType &opType); | ||||
| OpDescCfgBox &InCnt(int in_cnt); | OpDescCfgBox &InCnt(int in_cnt); | ||||
| OpDescCfgBox &OutCnt(int out_cnt); | OpDescCfgBox &OutCnt(int out_cnt); | ||||
| OpDescCfgBox &ParentNodeIndex(int node_index); | |||||
| OpDescCfgBox &TensorDesc(Format format = FORMAT_NCHW, DataType data_type = DT_FLOAT, | OpDescCfgBox &TensorDesc(Format format = FORMAT_NCHW, DataType data_type = DT_FLOAT, | ||||
| std::vector<int64_t> shape = {1, 1, 224, 224}); | |||||
| template<typename Type> | |||||
| OpDescCfgBox& Attr(const std::string &name, Type value) { | |||||
| auto attrvalue = ge::GeAttrValue::CreateFrom<Type>(value); | |||||
| attrs_.emplace(std::make_pair(name, attrvalue)); | |||||
| return *this; | |||||
| } | |||||
| std::vector<int64_t> shape = {1, 1, 224, 224}); | |||||
| OpDescCfgBox &Weight(GeTensorPtr &); | |||||
| private: | |||||
| template <typename Type> | |||||
| OpDescCfgBox &Attr(const std::string &name, Type &&value) { | |||||
| auto attrvalue = ge::GeAttrValue::CreateFrom<Type>(std::forward<Type>(value)); | |||||
| attrs_.emplace(std::make_pair(name, attrvalue)); | |||||
| return *this; | |||||
| } | |||||
| template <typename Type> | |||||
| OpDescCfgBox &Attr(const std::string &name, Type &value) { | |||||
| auto attrvalue = ge::GeAttrValue::CreateFrom<Type>(value); | |||||
| attrs_.emplace(std::make_pair(name, attrvalue)); | |||||
| return *this; | |||||
| } | |||||
| OpDescCfgBox &Attr(const std::string &name, int value); | |||||
| OpDescCfgBox &Attr(const std::string &name, const char *value); | |||||
| OpDescPtr Build(const ::EG_NS::NodeId &id) const override; | OpDescPtr Build(const ::EG_NS::NodeId &id) const override; | ||||
| void UpdateAttrs(OpDescPtr&) const; | |||||
| std::map<std::string, GeAttrValue> attrs_; | |||||
| private: | |||||
| void UpdateAttrs(OpDescPtr &) const; | |||||
| std::map<std::string, GeAttrValue> attrs_; | |||||
| }; | }; | ||||
| #define OP_CFG(optype) ::GE_NS::OpDescCfgBox(optype) | #define OP_CFG(optype) ::GE_NS::OpDescCfgBox(optype) | ||||
| @@ -0,0 +1,26 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "ge_graph_dsl/assert/assert_error.h" | |||||
| GE_NS_BEGIN | |||||
| AssertError::AssertError(const char *file, int line, const std::string &info) { | |||||
| this->info = std::string(file) + ":" + std::to_string(line) + "\n" + info; | |||||
| } | |||||
| const char *AssertError::what() const noexcept { return info.c_str(); } | |||||
| GE_NS_END | |||||
| @@ -0,0 +1,34 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "ge_graph_dsl/assert/check_utils.h" | |||||
| #include "graph/utils/dumper/ge_graph_dumper.h" | |||||
| #include "ge_graph_default_checker.h" | |||||
| #include "ge_graph_check_dumper.h" | |||||
| GE_NS_BEGIN | |||||
| bool CheckUtils::CheckGraph(const std::string &phase_id, const GraphCheckFun &fun) { | |||||
| auto &dumper = dynamic_cast<GeGraphCheckDumper &>(GraphDumperRegistry::GetDumper()); | |||||
| return dumper.CheckFor(GeGraphDefaultChecker(phase_id, fun)); | |||||
| } | |||||
| void CheckUtils::init() { | |||||
| static GeGraphCheckDumper checkDumper; | |||||
| GraphDumperRegistry::Register(checkDumper); | |||||
| } | |||||
| GE_NS_END | |||||
| @@ -0,0 +1,31 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "ge_graph_dsl/assert/filter_scope_guard.h" | |||||
| #include "graph/utils/dumper/ge_graph_dumper.h" | |||||
| #include "ge_dump_filter.h" | |||||
| GE_NS_BEGIN | |||||
| namespace { | |||||
| GeDumpFilter &GetDumpFilter() { return dynamic_cast<GeDumpFilter &>(GraphDumperRegistry::GetDumper()); } | |||||
| } // namespace | |||||
| FilterScopeGuard::FilterScopeGuard(const std::vector<std::string> &filter) { GetDumpFilter().Update(filter); } | |||||
| FilterScopeGuard::~FilterScopeGuard() { GetDumpFilter().Reset(); } | |||||
| GE_NS_END | |||||