| @@ -50,9 +50,8 @@ CommentPragmas: '^ IWYU pragma:' | |||
| CompactNamespaces: false | |||
| ConstructorInitializerAllOnOneLineOrOnePerLine: true | |||
| ConstructorInitializerIndentWidth: 4 | |||
| ContinuationIndentWidth: 2 | |||
| ContinuationIndentWidth: 4 | |||
| Cpp11BracedListStyle: true | |||
| DerivePointerAlignment: true | |||
| DisableFormat: false | |||
| ExperimentalAutoDetectBinPacking: false | |||
| FixNamespaceComments: true | |||
| @@ -94,7 +93,7 @@ PenaltyBreakString: 1000 | |||
| PenaltyBreakTemplateDeclaration: 10 | |||
| PenaltyExcessCharacter: 1000000 | |||
| PenaltyReturnTypeOnItsOwnLine: 200 | |||
| PointerAlignment: Left | |||
| PointerAlignment: Right | |||
| RawStringFormats: | |||
| - Language: Cpp | |||
| Delimiters: | |||
| @@ -95,6 +95,7 @@ else () | |||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | |||
| else() | |||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | |||
| find_module(opt_feature libopt_feature.so ${ASCEND_ATC_DIR}) | |||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | |||
| if(PLATFORM STREQUAL "train") | |||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||
| @@ -144,7 +144,6 @@ build_graphengine() | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_UT=ON" | |||
| fi | |||
| if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_ST=ON" | |||
| fi | |||
| @@ -176,7 +175,7 @@ build_graphengine() | |||
| TARGET="ge_compiler atc_atc.bin ge_executor_shared ${TARGET}" | |||
| elif [ "X$ENABLE_GE_ST" = "Xon" ] | |||
| then | |||
| TARGET="ge_graph_dsl_test graph_engine_test" | |||
| TARGET="ge_graph_dsl_test ge_running_env_test graph_engine_test" | |||
| elif [ "X$ENABLE_GE_UT" = "Xon" ] | |||
| then | |||
| TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | |||
| @@ -244,13 +243,13 @@ if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | |||
| mkdir -p ${OUTPUT_PATH}/plugin/opskernel | |||
| cp ${BUILD_PATH}/tests/framework/libnnengine.so ${OUTPUT_PATH}/plugin/nnengine | |||
| cp ${BUILD_PATH}/engine_conf.json ${OUTPUT_PATH}/plugin/nnengine/ge_config | |||
| cp ${BUILD_PATH}/tests/framework/libhost_cpu_engine.so ${OUTPUT_PATH}/plugin/opskernel | |||
| cp ${BUILD_PATH}/tests/framework/libge_local_engine.so ${OUTPUT_PATH}/plugin/opskernel | |||
| cp ${BUILD_PATH}/tests/framework/stub_engine/libfe.so ${OUTPUT_PATH}/plugin/opskernel | |||
| #prepare st execution bin | |||
| cp ${BUILD_PATH}/tests/st/testcase/graph_engine_test ${OUTPUT_PATH} | |||
| cp ${BUILD_PATH}/tests/framework/ge_running_env/tests/ge_running_env_test ${OUTPUT_PATH} | |||
| cp ${BUILD_PATH}/tests/framework/ge_graph_dsl/tests/ge_graph_dsl_test ${OUTPUT_PATH} | |||
| #execute st testcase | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ge_running_env_test && ${RUN_TEST_CASE} | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/graph_engine_test && ${RUN_TEST_CASE} | |||
| RUN_TEST_CASE=${OUTPUT_PATH}/ge_graph_dsl_test && ${RUN_TEST_CASE} | |||
| if [[ "$?" -ne 0 ]]; then | |||
| @@ -298,7 +298,9 @@ set(TRAIN_SRC_LIST | |||
| "graph/passes/hccl_continuous_memcpy_pass.cc" | |||
| "graph/passes/identity_pass.cc" | |||
| "graph/passes/ref_identity_delete_op_pass.cc" | |||
| "graph/passes/infer_base_pass.cc" | |||
| "graph/passes/infershape_pass.cc" | |||
| "graph/passes/infer_value_range_pass.cc" | |||
| "graph/passes/iterator_op_pass.cc" | |||
| "graph/passes/link_gen_mask_nodes_pass.cc" | |||
| "graph/passes/merge_pass.cc" | |||
| @@ -434,6 +436,7 @@ set(TRAIN_SRC_LIST | |||
| "graph/build/memory/max_block_mem_assigner.cc" | |||
| "graph/build/memory/var_mem_assign_util.cc" | |||
| "graph/build/memory/buffer_pool_mem_assigner.cc" | |||
| "ge_opt_info/ge_opt_info.cc" | |||
| ) | |||
| set(INFER_SRC_LIST | |||
| @@ -547,7 +550,9 @@ set(INFER_SRC_LIST | |||
| "graph/passes/shape_operate_op_remove_pass.cc" | |||
| "graph/passes/assert_pass.cc" | |||
| "graph/passes/dropout_pass.cc" | |||
| "graph/passes/infer_base_pass.cc" | |||
| "graph/passes/infershape_pass.cc" | |||
| "graph/passes/infer_value_range_pass.cc" | |||
| "graph/passes/unused_const_pass.cc" | |||
| "graph/passes/permute_pass.cc" | |||
| "graph/passes/ctrl_edge_transfer_pass.cc" | |||
| @@ -711,6 +716,7 @@ set(INFER_SRC_LIST | |||
| "graph/build/memory/max_block_mem_assigner.cc" | |||
| "graph/build/memory/var_mem_assign_util.cc" | |||
| "graph/build/memory/buffer_pool_mem_assigner.cc" | |||
| "ge_opt_info/ge_opt_info.cc" | |||
| ) | |||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
| @@ -765,11 +771,13 @@ target_include_directories(ge_runner SYSTEM PRIVATE | |||
| ${GE_CODE_DIR}/../inc | |||
| ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external | |||
| ${GE_CODE_DIR}/../abl/adump/external | |||
| ${GE_CODE_DIR}/../abl/licctrl | |||
| #### blue zone | |||
| ${ASCEND_DIR}/driver/include | |||
| ${ASCEND_DIR}/fwkacllib/include | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info | |||
| ) | |||
| target_link_options(ge_runner PRIVATE | |||
| @@ -792,6 +800,7 @@ target_link_libraries(ge_runner PRIVATE | |||
| runtime | |||
| error_manager | |||
| ascend_hal_stub | |||
| opt_feature | |||
| -Wl,--as-needed | |||
| json | |||
| -lrt | |||
| @@ -839,11 +848,13 @@ target_include_directories(ge_compiler SYSTEM PRIVATE | |||
| ${GE_CODE_DIR}/../inc | |||
| ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external | |||
| ${GE_CODE_DIR}/../abl/adump/external | |||
| ${GE_CODE_DIR}/../abl/licctrl | |||
| #### blue zone #### | |||
| ${ASCEND_DIR}/driver/include | |||
| ${ASCEND_DIR}/fwkacllib/include | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info | |||
| ) | |||
| target_link_options(ge_compiler PRIVATE | |||
| @@ -863,6 +874,7 @@ target_link_libraries(ge_compiler PRIVATE | |||
| error_manager | |||
| slog | |||
| runtime_compile | |||
| opt_feature | |||
| -Wl,--as-needed | |||
| json | |||
| -lrt | |||
| @@ -95,6 +95,7 @@ target_link_libraries(ge_common PRIVATE | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:runtime_headers>> | |||
| static_mmpa | |||
| -Wl,--no-as-needed | |||
| graph | |||
| @@ -155,6 +156,7 @@ target_link_libraries(ge_common_static PRIVATE | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:runtime_headers>> | |||
| ascend_protobuf_static | |||
| json | |||
| c_sec | |||
| @@ -18,6 +18,7 @@ | |||
| #include <cstdio> | |||
| #include <string> | |||
| #include <regex> | |||
| #include "common/ge/ge_util.h" | |||
| #include "framework/common/util.h" | |||
| @@ -37,6 +38,159 @@ const uint32_t kAtomicOverflow = (0x1 << 1); | |||
| const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | |||
| } // namespace | |||
| namespace ge { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::Split(const std::string &s, | |||
| std::vector<std::string> &result, | |||
| const char *delchar) { | |||
| if (s.empty()) { | |||
| return; | |||
| } | |||
| result.clear(); | |||
| char *buffer = new (std::nothrow)char[s.size() + 1]; | |||
| if (buffer == nullptr) { | |||
| GELOGE(FAILED, "[Split][string] failed while malloc memory, string value is:%s", s.c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Memory malloc may fail when split string, get fatal exception, " | |||
| "string value is:%s", s.c_str()); | |||
| return; | |||
| } | |||
| buffer[s.size()] = '\0'; | |||
| errno_t e = strcpy_s(buffer, s.size() + 1, s.c_str()); | |||
| if (e != EOK) { | |||
| delete[] buffer; | |||
| return; | |||
| } | |||
| char *context = nullptr; | |||
| char *p = strtok_s(buffer, delchar, &context); | |||
| while (p != nullptr) { | |||
| result.emplace_back(p); | |||
| p = strtok_s(nullptr, delchar, &context); | |||
| } | |||
| delete[] buffer; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckDumpStep(const std::string &dump_step) { | |||
| std::string modified_dum_step = dump_step + "|"; | |||
| std::smatch result; | |||
| std::vector<string> match_vecs; | |||
| std::regex pattern(R"((\d{1,}-\d{1,}\||\d{1,}\|)+)"); | |||
| if (regex_match(modified_dum_step, result, pattern)) { | |||
| Split(result.str(), match_vecs, "|"); | |||
| if (match_vecs.empty()) { | |||
| REPORT_CALL_ERROR("E19999", "Split may get fatal exception, dump_step:%s.", dump_step.c_str()); | |||
| GELOGE(FAILED, "[Check][Param] failed. Split may get fatal exception, ge.exec.dumpStep:%s.", dump_step.c_str()); | |||
| return FAILED; | |||
| } | |||
| // 100 is the max sets of dump steps. | |||
| if (match_vecs.size() > 100) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpStep", | |||
| dump_step.c_str(), | |||
| " is not supported, only support dump <= 100 sets of data"})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, " | |||
| "dump_step only support dump <= 100 sets of data.", dump_step.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| for (const auto &match_vec : match_vecs) { | |||
| std::vector<string> vec_after_split; | |||
| Split(match_vec, vec_after_split, "-"); | |||
| if (match_vecs.empty()) { | |||
| REPORT_CALL_ERROR("E19999", "Split may get fatal exception."); | |||
| GELOGE(FAILED, "[Check][Param] failed, split may get fatal exception."); | |||
| return FAILED; | |||
| } | |||
| if (vec_after_split.size() > 1) { | |||
| if (std::atoi(vec_after_split[0].c_str()) >= std::atoi(vec_after_split[1].c_str())) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpStep", | |||
| dump_step.c_str(), | |||
| " is not supported." | |||
| "in range steps, the first step is >= second step, correct example:'0|5|10-20"})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, " | |||
| "in range steps, the first step is >= second step, correct example:'0|5|10-20'", dump_step.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpStep", | |||
| dump_step.c_str(), | |||
| " is not supported, correct example:'0|5|10|50-100."})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, " | |||
| "dump_step string style is error, correct example:'0|5|10|50-100.'", dump_step.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckDumpMode(const std::string &dump_mode) { | |||
| const std::set<string> dump_mode_list = {"input", "output", "all"}; | |||
| std::set<string>::iterator iter; | |||
| if ((iter = dump_mode_list.find(dump_mode)) == dump_mode_list.end()) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpMode", | |||
| dump_mode.c_str(), | |||
| " is not supported, should be one of the following:[input, output, all]"})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] the dump_debug_mode:%s, is is not supported," | |||
| "should be one of the following:[input, output, all].", dump_mode.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckDumpPath(const std::string &input) { | |||
| if (mmIsDir(input.c_str()) != EN_OK) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpPath", | |||
| input.c_str(), | |||
| " is not a directory."})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] the path:%s, is not directory.", input.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| char trusted_path[MMPA_MAX_PATH] = { "\0" }; | |||
| if (mmRealPath(input.c_str(), trusted_path, MMPA_MAX_PATH) != EN_OK) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpPath", | |||
| input.c_str(), | |||
| " dumpPath invalid."})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] the dumpPath:%s, is invalid.", input.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (mmAccess2(trusted_path, M_R_OK | M_W_OK) != EN_OK) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpPath", | |||
| input.c_str(), | |||
| " does't have read, write permissions."})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] the path:%s, does't have read, write permissions.", input.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::CheckEnableDump(const std::string &input) { | |||
| std::set<string> enable_dump_option_list = {"1", "0"}; | |||
| auto it = enable_dump_option_list.find(input); | |||
| if (it == enable_dump_option_list.end()) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.enableDump", | |||
| input.c_str(), | |||
| " only support 1 or 0."})); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] Not support ge.exec.enableDump or ge.exec.enableDumpDebug format:%s, " | |||
| "only support 1 or 0.", input.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | |||
| CopyFrom(other); | |||
| } | |||
| @@ -47,7 +201,26 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties: | |||
| return *this; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOptions() { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::SetDumpOptions() { | |||
| if (enable_dump_ == kEnableFlag) { | |||
| std::string dump_step; | |||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | |||
| GE_CHK_STATUS_RET(CheckDumpStep(dump_step), "[Check][dump_step] failed."); | |||
| GELOGI("Get dump step %s successfully", dump_step.c_str()); | |||
| SetDumpStep(dump_step); | |||
| } | |||
| string dump_mode = "output"; | |||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | |||
| GELOGI("Get dump mode %s successfully", dump_mode.c_str()); | |||
| GE_CHK_STATUS_RET(CheckDumpMode(dump_mode), "[Check][dump_mode] failed."); | |||
| SetDumpMode(dump_mode); | |||
| } | |||
| AddPropertyValue(DUMP_ALL_MODEL, {}); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::InitByOptions() { | |||
| enable_dump_.clear(); | |||
| enable_dump_debug_.clear(); | |||
| dump_path_.clear(); | |||
| @@ -57,17 +230,32 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||
| is_infer_op_debug_ = false; | |||
| op_debug_mode_ = 0; | |||
| std::string enable_dump; | |||
| std::string enable_dump = std::to_string(false); | |||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump); | |||
| enable_dump_ = enable_dump; | |||
| if (!enable_dump_.empty()) { | |||
| GE_CHK_STATUS_RET(CheckEnableDump(enable_dump_), "[Check][enable_dump] failed."); | |||
| } | |||
| std::string enable_dump_debug; | |||
| std::string enable_dump_debug = std::to_string(false); | |||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug); | |||
| enable_dump_debug_ = enable_dump_debug; | |||
| if (!enable_dump_debug_.empty()) { | |||
| GE_CHK_STATUS_RET(CheckEnableDump(enable_dump_debug_), "[Check][enable_dump_debug] failed."); | |||
| } | |||
| if ((enable_dump_ == kEnableFlag) && (enable_dump_debug_ == kEnableFlag)) { | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.enableDump and ge.exec.enableDumpDebug", | |||
| enable_dump_ + ", " + enable_dump_debug, | |||
| "ge.exec.enableDump and ge.exec.enableDumpDebug cannot be set to 1 at the same time."})); | |||
| GELOGE(FAILED, "ge.exec.enableDump and ge.exec.enableDumpDebug cannot be both set to 1 at the same time."); | |||
| return FAILED; | |||
| } | |||
| if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) { | |||
| std::string dump_path; | |||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) { | |||
| GE_CHK_STATUS_RET(CheckDumpPath(dump_path), "Check dump path failed."); | |||
| if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | |||
| dump_path = dump_path + "/"; | |||
| } | |||
| @@ -75,25 +263,21 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||
| GELOGI("Get dump path %s successfully", dump_path.c_str()); | |||
| SetDumpPath(dump_path); | |||
| } else { | |||
| GELOGW("Dump path is not set"); | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpPath", | |||
| dump_path, | |||
| "ge.exec.dumpPath is not set."})); | |||
| GELOGE(FAILED, "[Check][dump_path] failed. Dump path is not set."); | |||
| return FAILED; | |||
| } | |||
| } | |||
| if (enable_dump_ == kEnableFlag) { | |||
| std::string dump_step; | |||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | |||
| GELOGI("Get dump step %s successfully", dump_step.c_str()); | |||
| SetDumpStep(dump_step); | |||
| } | |||
| string dump_mode; | |||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | |||
| GELOGI("Get dump mode %s successfully", dump_mode.c_str()); | |||
| SetDumpMode(dump_mode); | |||
| } | |||
| AddPropertyValue(DUMP_ALL_MODEL, {}); | |||
| } | |||
| GE_CHK_STATUS_RET(SetDumpOptions(), "SetDumpOptions failed."); | |||
| GE_CHK_STATUS_RET(SetDumpDebugOptions(), "SetDumpDebugOptions failed."); | |||
| SetDumpDebugOptions(); | |||
| return SUCCESS; | |||
| } | |||
| // The following is the new dump scenario of the fusion operator | |||
| @@ -253,14 +437,20 @@ void DumpProperties::CopyFrom(const DumpProperties &other) { | |||
| } | |||
| } | |||
| void DumpProperties::SetDumpDebugOptions() { | |||
| Status DumpProperties::SetDumpDebugOptions() { | |||
| if (enable_dump_debug_ == kEnableFlag) { | |||
| std::string dump_debug_mode; | |||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) { | |||
| GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str()); | |||
| } else { | |||
| GELOGW("Dump debug mode is not set."); | |||
| return; | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpDebugMode", | |||
| dump_debug_mode, | |||
| "ge.exec.dumpDebugMode is not set."})); | |||
| GELOGE(PARAM_INVALID, "[Check][dump_debug_mode] failed. Dump debug mode is not set."); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (dump_debug_mode == OP_DEBUG_AICORE) { | |||
| @@ -276,10 +466,17 @@ void DumpProperties::SetDumpDebugOptions() { | |||
| is_train_op_debug_ = true; | |||
| op_debug_mode_ = kAllOverflow; | |||
| } else { | |||
| GELOGW("ge.exec.dumpDebugMode is invalid."); | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}), | |||
| std::vector<std::string>({ | |||
| "ge.exec.dumpDebugMode", | |||
| dump_debug_mode, | |||
| "ge.exec.dumpDebugMode is invalid."})); | |||
| GELOGE(PARAM_INVALID, "[Set][DumpDebugOptions] failed, ge.exec.dumpDebugMode is invalid."); | |||
| return PARAM_INVALID; | |||
| } | |||
| } else { | |||
| GELOGI("ge.exec.enableDumpDebug is false or is not set."); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -23,6 +23,7 @@ | |||
| #include <vector> | |||
| namespace ge { | |||
| using Status = uint32_t; | |||
| class DumpProperties { | |||
| public: | |||
| DumpProperties() = default; | |||
| @@ -33,7 +34,7 @@ class DumpProperties { | |||
| DumpProperties &operator=(const DumpProperties &dump); | |||
| void InitByOptions(); | |||
| Status InitByOptions(); | |||
| void AddPropertyValue(const std::string &model, const std::set<std::string> &layers); | |||
| @@ -95,7 +96,20 @@ class DumpProperties { | |||
| private: | |||
| void CopyFrom(const DumpProperties &other); | |||
| void SetDumpDebugOptions(); | |||
| Status SetDumpDebugOptions(); | |||
| Status SetDumpOptions(); | |||
| void Split(const std::string &s, std::vector<std::string> &result, const char *delchar); | |||
| Status CheckDumpStep(const std::string &dump_step); | |||
| Status CheckDumpMode(const std::string &dump_mode); | |||
| Status CheckDumpPath(const std::string &input); | |||
| Status CheckEnableDump(const std::string &input); | |||
| std::string enable_dump_; | |||
| std::string enable_dump_debug_; | |||
| @@ -161,6 +161,7 @@ Status ExceptionDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> &ex | |||
| uint64_t proto_size = dump_data.ByteSizeLong(); | |||
| std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | |||
| GE_CHECK_NOTNULL(proto_msg); | |||
| bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | |||
| if (!ret || proto_size == 0) { | |||
| REPORT_INNER_ERROR("E19999", "Serialize proto to string fail"); | |||
| @@ -49,6 +49,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY std::string ShapeToString(const s | |||
| return JoinToString(shape); | |||
| } | |||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY | |||
| std::string RangeToString(const std::vector<std::pair<int64_t, int64_t>> &ranges) { | |||
| bool first = true; | |||
| std::stringstream ss; | |||
| ss << "["; | |||
| for (const auto &range : ranges) { | |||
| if (first) { | |||
| first = false; | |||
| } else { | |||
| ss << ","; | |||
| } | |||
| ss << "{"; | |||
| ss << range.first << "," << range.second; | |||
| ss << "}"; | |||
| } | |||
| ss << "]"; | |||
| return ss.str(); | |||
| } | |||
| int64_t GetItemNumByShape(const std::vector<int64_t> &shape) { | |||
| int64_t num = 1; | |||
| for (auto dim : shape) { | |||
| @@ -54,6 +54,8 @@ std::string ShapeToString(const GeShape &shape); | |||
| std::string ShapeToString(const std::vector<int64_t> &shape); | |||
| std::string RangeToString(const std::vector<std::pair<int64_t, int64_t>> &ranges); | |||
| int64_t GetItemNumByShape(const std::vector<int64_t> &shape); | |||
| bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dims); | |||
| @@ -186,6 +186,8 @@ target_include_directories(ge_executor SYSTEM PRIVATE | |||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | |||
| #### yellow zone #### | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:${GE_DEPEND_DIR}/inc> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<TARGET_PROPERTY:runtime_headers,INTERFACE_INCLUDE_DIRECTORIES>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<TARGET_PROPERTY:cce_headers,INTERFACE_INCLUDE_DIRECTORIES>> | |||
| #### blue zone #### | |||
| $<$<BOOL:${ENABLE_OPEN_SRC}>:${GE_CODE_DIR}/third_party/fwkacllib/inc> | |||
| $<$<BOOL:${ENABLE_OPEN_SRC}>:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> | |||
| @@ -251,6 +253,8 @@ target_link_libraries(ge_executor_shared PRIVATE | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:slog_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:msprof_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:mmpa_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:runtime_headers>> | |||
| $<$<NOT:$<BOOL:${ENABLE_OPEN_SRC}>>:$<BUILD_INTERFACE:cce_headers>> | |||
| -Wl,--no-as-needed | |||
| ge_common | |||
| runtime | |||
| @@ -125,34 +125,41 @@ void SetDynamicInputDataFlag(const ge::RunModelData &input_data, const std::vect | |||
| bool IsDynamicBatchSizeMatchModel(uint64_t batch_size, const vector<std::vector<int64_t>> &batch_info) { | |||
| if (batch_info.empty()) { | |||
| GELOGE(ge::FAILED, "Dynamic batch info is empty."); | |||
| REPORT_INNER_ERROR("E19999", "param Dynamic batch info is empty, check invalid."); | |||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch info is empty."); | |||
| return false; | |||
| } | |||
| for (auto batch : batch_info) { | |||
| if (batch.size() != kDynamicBatchSizeVecSize) { | |||
| GELOGE(ge::FAILED, "Dynamic batch param num is %zu, current batch size is %zu.", kDynamicBatchSizeVecSize, | |||
| batch.size()); | |||
| REPORT_INNER_ERROR("E19999", "Dynamic batch param num is %zu, current batch size is %zu.", | |||
| kDynamicBatchSizeVecSize, batch.size()); | |||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch param num is %zu, current batch size is %zu.", | |||
| kDynamicBatchSizeVecSize, batch.size()); | |||
| return false; | |||
| } | |||
| if (batch[0] == static_cast<int64_t>(batch_size)) { | |||
| return true; | |||
| } | |||
| } | |||
| GELOGE(ge::FAILED, "Dynamic batch %lu can not match the gear of model.", batch_size); | |||
| REPORT_INNER_ERROR("E19999", "Dynamic batch %lu can not match the gear of model.", batch_size); | |||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch %lu can not match the gear of model.", batch_size); | |||
| return false; | |||
| } | |||
| bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, | |||
| const vector<std::vector<int64_t>> &batch_info) { | |||
| if (batch_info.empty()) { | |||
| GELOGE(ge::FAILED, "Dynamic batch info is empty."); | |||
| REPORT_INNER_ERROR("E19999", "ParamDynamic batch info is empty. check invalid"); | |||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic batch info is empty."); | |||
| return false; | |||
| } | |||
| for (auto resolution : batch_info) { | |||
| if (resolution.size() != kDynamicImageSizeVecSize) { | |||
| GELOGE(ge::FAILED, "Dynamic resolution param num is %zu, current resolution size is %zu.", | |||
| REPORT_INNER_ERROR("E19999", "Dynamic resolution param num is %zu, current resolution size is %zu.", | |||
| kDynamicImageSizeVecSize, resolution.size()); | |||
| GELOGE(ge::FAILED, "[Check][Param] Dynamic resolution param num is %zu, current resolution size is %zu.", | |||
| kDynamicImageSizeVecSize, resolution.size()); | |||
| return false; | |||
| } | |||
| @@ -160,22 +167,28 @@ bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, | |||
| return true; | |||
| } | |||
| } | |||
| GELOGE(ge::FAILED, "Dynamic resolution (%lu,%lu) can not match the gear of model.", image_height, image_width); | |||
| REPORT_INNER_ERROR("E19999", "Dynamic resolution (%lu,%lu) can not match the gear of model.", | |||
| image_height, image_width); | |||
| GELOGE(ge::FAILED, "[Check][Param]Dynamic resolution (%lu,%lu) can not match the gear of model.", | |||
| image_height, image_width); | |||
| return false; | |||
| } | |||
| bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||
| const vector<vector<int64_t>> &batch_info) { | |||
| if (batch_info.empty()) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Dynamic batch info is empty."); | |||
| REPORT_INNER_ERROR("E19999", "param batch_info is empty, check invalid"); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] Dynamic batch info is empty."); | |||
| return false; | |||
| } | |||
| bool find_match = false; | |||
| for (auto resolution : batch_info) { | |||
| if (cur_dynamic_dims.size() != resolution.size()) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Cur dynamic dims param num is %zu, current resolution size is %zu.", | |||
| REPORT_INNER_ERROR("E19999", "Cur dynamic dims param num is %zu, current resolution size is %zu.", | |||
| cur_dynamic_dims.size(), resolution.size()); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "[Check][Param] Cur dynamic dims param num is %zu, current resolution size is %zu.", | |||
| cur_dynamic_dims.size(), resolution.size()); | |||
| return false; | |||
| } | |||
| @@ -192,7 +205,7 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||
| } | |||
| } | |||
| if (!find_match) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "choose dynamic dims can not match the gear of model."); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] choose dynamic dims can not match the gear of model."); | |||
| } | |||
| return find_match; | |||
| } | |||
| @@ -241,7 +254,7 @@ Status GeExecutor::Initialize() { | |||
| Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | |||
| if (init_hostcpu_engine_status != SUCCESS) { | |||
| GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine"); | |||
| GELOGE(init_hostcpu_engine_status, "[initialize][HostCpuEngine] failed"); | |||
| return init_hostcpu_engine_status; | |||
| } | |||
| @@ -251,12 +264,12 @@ Status GeExecutor::Initialize() { | |||
| mem_type.push_back(RT_MEMORY_P2P_DDR); | |||
| auto ret = MemManager::Instance().Initialize(mem_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Memory Manager init failed."); | |||
| GELOGE(ret, "[Initialize][MemManager] failed."); | |||
| return ret; | |||
| } | |||
| GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), | |||
| "Failed to initialize OpsKernelBuilders."); | |||
| "[Initialize][OpsKernelBuilderManager] failed."); | |||
| // Start profiling | |||
| Options profiling_options; | |||
| @@ -292,13 +305,18 @@ Status GeExecutor::Finalize() { | |||
| Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
| uint64_t batch_size) { | |||
| if (dynamic_input_addr == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr is nullptr!"); | |||
| REPORT_INNER_ERROR("E19999", "param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||
| "[Check][Param] Dynamic input addr is nullptr, model id:%u", model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | |||
| } | |||
| uint64_t size = sizeof(uint32_t); | |||
| if (length < size) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); | |||
| REPORT_INNER_ERROR("E19999", "Dynamic input size [%lu] is less than [%lu], check invalid, model id:%u", | |||
| length, size, model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "[Check][Param] Dynamic input size [%lu] is less than [%lu], model id:%u", length, size, model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| if (length >= sizeof(uint64_t)) { | |||
| @@ -311,24 +329,28 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Get dynamic input info failed."); | |||
| REPORT_CALL_ERROR("E19999", "get dynamic batch info failed, model id:%u", model_id); | |||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "The current dynamic input does not match the gear of the model."); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, | |||
| "[Check][Param] The current dynamic input does not match the gear of the model(id:%u).", model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | |||
| } | |||
| ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_BATCH)); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Set dynamic size failed"); | |||
| REPORT_CALL_ERROR("E19999", "set dynamic size failed, model id:%u, dynamic_type:1", model_id); | |||
| GELOGE(ret, "[Set][DynamicSize] failed, model id:%u, dynamic_type:1", model_id); | |||
| return ret; | |||
| } | |||
| // memcpy dynamic_batch_size from host to device | |||
| rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "memcpy dynamic batch input data failed! ret: 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy, size:%lu ret:0x%X", length, rt_ret); | |||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic batch input data failed! size:%lu ret:0x%X", length, rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| @@ -337,14 +359,19 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||
| Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
| uint64_t image_height, uint64_t image_width) { | |||
| if (dynamic_input_addr == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr is nullptr!"); | |||
| REPORT_INNER_ERROR("E19999", "param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||
| "[Check][Param] Dynamic input addr is nullptr, model id:%u", model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | |||
| } | |||
| uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint32_t); | |||
| if (length < dynamic_input_size) { | |||
| REPORT_INNER_ERROR("E19999", "Dynamic input size [%lu] is less than [%lu], check invalid, model id:%u", | |||
| length, dynamic_input_size, model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||
| "[Check][Param] Dynamic input size [%lu] is less than [%lu], model id:%u", | |||
| length, dynamic_input_size, model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| uint64_t size = sizeof(uint32_t); | |||
| @@ -357,18 +384,22 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Get dynamic input info failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get dynamic input info failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "The current dynamic input does not match the gear of the model."); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, | |||
| "[Check][Param] The current dynamic input does not match the gear of the model, " | |||
| "image_height:%lu, image_width:%lu.", image_height, image_width); | |||
| return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | |||
| } | |||
| ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_IMAGE)); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Set dynamic size failed"); | |||
| REPORT_CALL_ERROR("E19999", "Set dynamic size failed, model id:%u,", model_id); | |||
| GELOGE(ret, "[Set][DynamicSize] failed, model id:%u", model_id); | |||
| return ret; | |||
| } | |||
| @@ -376,7 +407,9 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||
| rtError_t rt_ret = | |||
| rtMemcpy(dynamic_input_addr, size, &image_height, size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed! size:%lu, ret:0x%X, model id:%u", size, rt_ret, model_id); | |||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic resolution input data failed! size:%lu, ret:0x%X, model id:%u", | |||
| size, rt_ret, model_id); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| @@ -385,7 +418,10 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + size), | |||
| remain_size, &image_width, size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "memcpy dynamic resolution input data failed!"); | |||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed! size:%lu, ret:0x%X, model id:%u", | |||
| remain_size, rt_ret, model_id); | |||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic resolution input data failed! size:%lu, ret:0x%X, model id:%u", | |||
| remain_size, rt_ret, model_id); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| @@ -394,40 +430,48 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||
| Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
| const vector<uint64_t> &dynamic_dims) { | |||
| if (dynamic_input_addr == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr is nullptr!"); | |||
| REPORT_INNER_ERROR("E19999", "Param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||
| "[Check][Param] Dynamic input addr is nullptr, model id:%u", model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | |||
| } | |||
| vector<uint64_t> cur_dynamic_dims; | |||
| Status ret = GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Set cur gear dynamic dims failed"); | |||
| GELOGE(ret, "[Get][CurDynamicDims] failed, model id:%u", model_id); | |||
| return ret; | |||
| } | |||
| std::vector<std::vector<int64_t>> batch_info; | |||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
| ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Get dynamic input info failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get dynamic input info failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| if (!IsDynmaicDimsSizeMatchModel(cur_dynamic_dims, batch_info)) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "The current dynamic input does not match the gear of the model."); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, | |||
| "[Check][Param] The current dynamic input does not match the gear of the model, id:%u.", model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID; | |||
| } | |||
| ret = GraphExecutor::SetDynamicSize(model_id, cur_dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Set dynamic size failed"); | |||
| REPORT_CALL_ERROR("E19999", "Set dynamic size failed, model id:%u", model_id); | |||
| GELOGE(ret, "[Set][DynamicSize] failed, model id:%u", model_id); | |||
| return ret; | |||
| } | |||
| size_t dynamic_dim_num = cur_dynamic_dims.size(); | |||
| uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint32_t)); | |||
| if (length < dynamic_input_size) { | |||
| REPORT_INNER_ERROR("E19999", "input dynamic size [%lu] is less than [%lu], model id:%u", | |||
| length, dynamic_input_size, model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||
| "[Check][Param] Dynamic input size [%lu] is less than [%lu], model id:%u", | |||
| length, dynamic_input_size, model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| uint64_t size = sizeof(uint32_t); | |||
| @@ -440,7 +484,9 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u | |||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + size * i), | |||
| length - size * i, &cur_dynamic_dims[i], size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "memcpy dynamic resolution input data failed!"); | |||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", (length - size * i), rt_ret); | |||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy dynamic resolution input data failed! size:%lu, ret:0x%X", | |||
| length - size * i, rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| @@ -454,14 +500,14 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||
| vector<ge::TensorDesc> output_desc; | |||
| auto ret = GetModelDescInfo(model_id, input_desc, output_desc); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "GetModelDescInfo failed."); | |||
| GELOGE(ret, "[Get][ModelDescInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| vector<string> user_designate_shape_order; | |||
| vector<int64_t> all_data_dims; | |||
| ret = GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "GetUserDesignateShapeOrder failed."); | |||
| GELOGE(ret, "[Call][GetUserDesignateShapeOrder] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| for (auto &data_name : user_designate_shape_order) { | |||
| @@ -475,8 +521,10 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||
| } | |||
| } | |||
| if (dynamic_dims.size() != all_data_dims.size()){ | |||
| REPORT_INNER_ERROR("E19999", "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", | |||
| dynamic_dims.size(), all_data_dims.size()); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", | |||
| "[Check][Param] Dynamic input size [%lu] is not equal with all data dims size [%lu]!", | |||
| dynamic_dims.size(), all_data_dims.size()); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| @@ -484,8 +532,10 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||
| if (all_data_dims[i] < 0) { | |||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | |||
| } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | |||
| REPORT_INNER_ERROR("E19999", "Static dims should be same, index:%zu value:%lu should be %ld", | |||
| i, dynamic_dims[i], all_data_dims[i]); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "Static dims should be same, index: %zu value: %lu should be %ld", | |||
| "[Check][Param] Static dims should be same, index:%zu value:%lu should be %ld", | |||
| i, dynamic_dims[i], all_data_dims[i]); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| @@ -496,12 +546,14 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||
| Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||
| GELOGI("Begin to get current shape"); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized, model id:%u", model_id); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Get current shape failed"); | |||
| REPORT_CALL_ERROR("E19999", "Get Cur Shape failed, model id:%u", model_id); | |||
| GELOGE(ret, "[Get][CurShape] failed, model id:%u", model_id); | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| @@ -512,11 +564,14 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
| const kAippDynamicPara &aippParms) { | |||
| GELOGI("Enter to SetDynamicAippData."); | |||
| if (dynamic_input_addr == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic aipp input addr is nullptr!"); | |||
| REPORT_INNER_ERROR("E19999", "Param dynamic_input_addr is nullptr, check invalid, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, | |||
| "[Check][Param] Dynamic aipp input addr is nullptr, model id:%u", model_id); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID; | |||
| } | |||
| if (aippBatchPara.empty()) { | |||
| GELOGE(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "aippBatchPara is empty."); | |||
| REPORT_INNER_ERROR("E19999", "Param aippBatchPara is empty, check invalid, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "[Check][Param] aippBatchPara is empty, model id:%u", model_id); | |||
| return ACL_ERROR_GE_AIPP_BATCH_EMPTY; | |||
| } | |||
| uint64_t batch_num = aippBatchPara.size(); | |||
| @@ -527,14 +582,18 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
| "batch num is %lu, struct_len is %lu", | |||
| model_id, length, batch_num, struct_len); | |||
| if (struct_len > length) { | |||
| REPORT_INNER_ERROR("E19999", "input dynamic aipp param len:%lu is larger than aipp_data size:%lu", | |||
| struct_len, length); | |||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||
| "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); | |||
| "[Check][Param] input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", | |||
| struct_len, length); | |||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | |||
| } | |||
| // Memcpy real kAippDynamicBatchPara from host to device | |||
| rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "memcpy real_aippParms_size failed! ret: 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", length, rt_ret); | |||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy aippParms failed! size:%lu, ret:0x%X", length, rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint64_t remain_len = length - real_aippParms_size; | |||
| @@ -545,7 +604,8 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
| (remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), | |||
| sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "memcpy kAippDynamicBatchPara input data failed! ret: 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X", rt_ret); | |||
| GELOGE(rt_ret, "[Call][RtMemcpy] memcpy kAippDynamicBatchPara input data failed! ret:0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| @@ -555,12 +615,14 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
| Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
| GELOGD("unload model %u begin.", model_id); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | |||
| REPORT_CALL_ERROR("E19999", "Destroy Aicpu Session For Infer failed, model id:%u", model_id); | |||
| GELOGE(ret, "[Destroy][AicpuSession] For Infer failed. model id:%u", model_id); | |||
| return ret; | |||
| } | |||
| @@ -578,7 +640,8 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
| } | |||
| ret = GraphLoader::UnloadModel(model_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | |||
| REPORT_CALL_ERROR("E19999", "unload model failed, model id:%u", model_id); | |||
| GELOGE(ret, "[Unload][Model] failed. model id:%u", model_id); | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| @@ -588,7 +651,8 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
| Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized, model id:%u", model_id); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized, model id:%u", model_id); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| @@ -600,20 +664,26 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
| Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, | |||
| output_formats, new_model_desc); | |||
| if (ret != domi::SUCCESS) { | |||
| GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | |||
| REPORT_CALL_ERROR("E19999", "get input output desc info failed, ret = %u, model id:%u", ret, model_id); | |||
| GELOGE(ret, "[Get][InputOutputDescInfo] failed. ret = %u, model id:%u", ret, model_id); | |||
| return ACL_ERROR_GE_GET_TENSOR_INFO; | |||
| } | |||
| if (input_formats.size() != input_desc_infos.size()) { | |||
| REPORT_INNER_ERROR("E19999", "input_formats size %zu is not equal to input_desc_infos size %zu, model id:%u.", | |||
| input_formats.size(), input_desc_infos.size(), model_id); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "input_formats size %zu is not equal to input_desc_infos size %zu.", | |||
| input_formats.size(), input_desc_infos.size()); | |||
| "[Check][Param] input_formats size %zu is not equal to input_desc_infos size %zu, model id:%u.", | |||
| input_formats.size(), input_desc_infos.size(), model_id); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| if (output_formats.size() != output_desc_infos.size()) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats size %zu is not equal to output_desc_infos size %zu.", | |||
| output_formats.size(), output_desc_infos.size()); | |||
| REPORT_INNER_ERROR("E19999", "output_formats size %zu is not equal to output_desc_infos size %zu, model id:%u.", | |||
| output_formats.size(), output_desc_infos.size(), model_id); | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "[Check][Param] output_formats size %zu is not equal to output_desc_infos size %zu, model id:%u.", | |||
| output_formats.size(), output_desc_infos.size(), model_id); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| @@ -635,13 +705,15 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
| Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
| int32_t &dynamic_type) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetDynamicBatchInfo failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get Dynamic BatchInfo failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| @@ -657,13 +729,15 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||
| Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64_t>> &batch_info) { | |||
| GELOGI("Begin to get combined dynamic dims info."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetCombinedDynamicDims failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get Combined DynamicDims failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][CombinedDynamicDims] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| @@ -680,13 +754,15 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64 | |||
| /// | |||
| Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetUserDesignateShapeOrder failed."); | |||
| REPORT_CALL_ERROR("E19999", "GetUserDesignateShapeOrder failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Call][GetUserDesignateShapeOrder] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| @@ -704,7 +780,8 @@ Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> | |||
| Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||
| GELOGI("Begin to GetAIPPInfo."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor not inited yet!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info); | |||
| @@ -719,7 +796,8 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo | |||
| Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||
| GELOGI("Begin to get aipp type."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not inited yet!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); | |||
| @@ -741,8 +819,10 @@ Status GeExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, cons | |||
| } | |||
| Status ret = GraphExecutor::GetOpAttr(model_id, op_name, attr_name, attr_value); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str()); | |||
| GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed, model id:%u.", | |||
| op_name.c_str(), attr_name.c_str(), model_id); | |||
| REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed, model id:%u", | |||
| op_name.c_str(), attr_name.c_str(), model_id); | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| @@ -750,12 +830,14 @@ Status GeExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, cons | |||
| Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not inited yet!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not inited yet!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Get dynamic batch output shape info failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get Model Attr failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][ModelAttr] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| return SUCCESS; | |||
| @@ -764,7 +846,8 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn | |||
| Status GeExecutor::CommandHandle(const Command &command) { | |||
| Status ret = GraphLoader::CommandHandle(command); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "CommandHandle: Command Handle failed."); | |||
| REPORT_CALL_ERROR("E19999", "call CommandHandle failed, ret:%u", ret); | |||
| GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "[Call][CommandHandle] failed, ret:%u", ret); | |||
| return ACL_ERROR_GE_COMMAND_HANDLE; | |||
| } | |||
| return SUCCESS; | |||
| @@ -773,7 +856,8 @@ Status GeExecutor::CommandHandle(const Command &command) { | |||
| Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | |||
| GELOGI("Get max used memory begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| @@ -793,14 +877,15 @@ Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | |||
| Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) { | |||
| GELOGI("Load data from file begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| string filePath = RealPath(path.c_str()); | |||
| if (filePath.empty()) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, | |||
| "File path is invalid. please check your text file '%s'.", path.c_str()); | |||
| "[Call][RealPath] File path is invalid. please check your text file '%s'.", path.c_str()); | |||
| return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||
| } | |||
| GELOGI("load modelData from file: %s.", path.c_str()); | |||
| @@ -829,7 +914,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||
| Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | |||
| void *weight_ptr, size_t weight_size) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not inited yet!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| @@ -850,7 +936,8 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||
| const std::vector<uint32_t> &output_queue_ids) { | |||
| GELOGI("Load model with queue begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | |||
| @@ -889,7 +976,8 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||
| const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | |||
| std::vector<GeTensorDesc> &output_desc, bool async_mode) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| @@ -904,7 +992,8 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Get dynamic input info failed."); | |||
| REPORT_CALL_ERROR("E19999", "get dynamic batch info failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| if (!batch_info.empty()) { | |||
| @@ -926,14 +1015,16 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||
| Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) { | |||
| GELOGI("Get memory and weight size from file begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| ModelData model; | |||
| Status ret = ge::GraphLoader::LoadDataFromFile(path, 0, model); | |||
| if ((ret != SUCCESS) || (model.model_data == nullptr)) { | |||
| GELOGE(ret, "Load data from file failed. ret = %d", ret); | |||
| REPORT_CALL_ERROR("E19999", "load data from file failed, ret = %d", ret); | |||
| GELOGE(ret, "[Load][Data] from file failed. ret = %d", ret); | |||
| return ret; | |||
| } | |||
| @@ -958,12 +1049,14 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size | |||
| size_t &weight_size) { | |||
| GELOGI("Get memory and weight size from data begin."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| if (model_data == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "invalid model data!"); | |||
| REPORT_INNER_ERROR("E19999", "param model_data is nullptr, check invalid!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "[Check][Param] invalid model data!"); | |||
| return ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID; | |||
| } | |||
| @@ -997,7 +1090,8 @@ Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge | |||
| Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
| std::vector<DataBuffer> &outputs) { | |||
| if (executor == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "param is NULL"); | |||
| REPORT_INNER_ERROR("E19999", "Param executor is nullptr, check invalid"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] param executor is nullptr"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| @@ -1021,7 +1115,8 @@ Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| auto davinci_model = model_manager->GetModel(model_id); | |||
| if (davinci_model == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id: %d is invaild or model is not loaded.", model_id); | |||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "[Get][Model] failed, Model id:%u is invaild or model is not loaded.", model_id); | |||
| return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | |||
| } | |||
| @@ -1034,7 +1129,7 @@ Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||
| int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
| Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Calc batch info size failed. ret = %d", ret); | |||
| GELOGE(ret, "[Get][DynamicBatchInfo] failed. ret = %d, model id:%u", ret, model_id); | |||
| return ret; | |||
| } | |||
| if (batch_info.empty()) { | |||
| @@ -1048,13 +1143,15 @@ Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||
| Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { | |||
| GELOGI("Begin to GetOrigInputInfo."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetOrigInputInfo failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get Orig Input Info failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][OrigInputInfo] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| @@ -1067,13 +1164,15 @@ Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||
| std::vector<InputOutputDims> &output_dims) { | |||
| GELOGI("Begin to GetAllAippInputOutputDims."); | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
| REPORT_INNER_ERROR("E19999", "GeExecutor has not been initialized!"); | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "[Check][Param] GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| } | |||
| Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetAllAippInputOutputDims failed."); | |||
| REPORT_CALL_ERROR("E19999", "Get All Aipp Input Output Dims failed, model id:%u.", model_id); | |||
| GELOGE(ret, "[Get][AllAippInputOutputDims] failed, model id:%u.", model_id); | |||
| return ret; | |||
| } | |||
| @@ -1085,7 +1184,10 @@ Status GeExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_ | |||
| GELOGI("Begin to GetOpDescInfo."); | |||
| Status ret = GraphExecutor::GetOpDescInfo(device_id, stream_id, task_id, op_desc_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetOpDescInfo failed."); | |||
| REPORT_CALL_ERROR("E19999", "get opdesc info failed, device_id:%u, stream_id:%u, task_id:%u.", | |||
| device_id, stream_id, task_id); | |||
| GELOGE(ret, "[Get][OpDescInfo] failed, device_id:%u, stream_id:%u, task_id:%u.", | |||
| device_id, stream_id, task_id); | |||
| return ret; | |||
| } | |||
| GELOGI("GetOpDescInfo succ."); | |||
| @@ -1096,7 +1198,7 @@ Status GeExecutor::SetDump(const DumpConfig &dump_config) { | |||
| GELOGI("Start to set dump config"); | |||
| auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Set dump conf failed"); | |||
| GELOGE(ret, "[Set][DumpConf] failed, ret:%d", ret); | |||
| return ret; | |||
| } | |||
| GELOGI("Set dump config successfully"); | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "ge_opt_info/ge_opt_info.h" | |||
| #include <string> | |||
| #include <map> | |||
| #include "graph/ge_local_context.h" | |||
| #include "ge/ge_api_types.h" | |||
| #include "common/debug/ge_log.h" | |||
| #include "opt_info.h" | |||
| namespace ge { | |||
| Status GeOptInfo::SetOptInfo() { | |||
| std::string soc_ver; | |||
| graphStatus ret = GetThreadLocalContext().GetOption(SOC_VERSION, soc_ver); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Get soc version failed."); | |||
| GELOGE(FAILED, "[Get][SocVersion]Get soc version failed."); | |||
| return FAILED; | |||
| } | |||
| GELOGD("Soc version:%s.", soc_ver.c_str()); | |||
| std::map<std::string, std::string> opt_info; | |||
| // the first arg does not work at present. | |||
| if (gelc::GetOptInfo(gelc::kOffline, soc_ver, opt_info) != gelc::SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Get optional information failed, is_offline:%d, soc version:%s", | |||
| gelc::kOffline, soc_ver.c_str()); | |||
| GELOGE(FAILED, "[Get][OptInfo]Get optional information failed, is_offline:%d, soc version:%s", | |||
| gelc::kOffline, soc_ver.c_str()); | |||
| return FAILED; | |||
| } | |||
| // do nothing if get empty information | |||
| if (opt_info.empty()) { | |||
| GELOGI("Optional information is empty."); | |||
| return SUCCESS; | |||
| } | |||
| std::map<std::string, std::string> graph_options = GetThreadLocalContext().GetAllGraphOptions(); | |||
| for (const auto &itr : opt_info) { | |||
| graph_options.emplace(itr.first, itr.second); | |||
| GELOGI("Get optional information success, key:%s, value:%s.", itr.first.c_str(), itr.second.c_str()); | |||
| } | |||
| GetThreadLocalContext().SetGraphOption(graph_options); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -14,23 +14,18 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||
| #ifndef GE_OPT_INFO_GE_OPT_INFO_H_ | |||
| #define GE_OPT_INFO_GE_OPT_INFO_H_ | |||
| #include "stub_engine/ops_kernel_store/op/op.h" | |||
| #include "ge/ge_api_error_codes.h" | |||
| #include "register/register_types.h" | |||
| namespace ge { | |||
| namespace st { | |||
| class GE_FUNC_VISIBILITY HostOp : public Op { | |||
| class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeOptInfo { | |||
| public: | |||
| HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | |||
| ~HostOp() override = default; | |||
| HostOp &operator=(const HostOp &op) = delete; | |||
| HostOp(const HostOp &op) = delete; | |||
| Status Run() override; | |||
| GeOptInfo() = default; | |||
| static Status SetOptInfo(); | |||
| }; | |||
| } // namespace st | |||
| } // namespace ge | |||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||
| #endif // GE_OPT_INFO_GE_OPT_INFO_H_ | |||
| @@ -16,6 +16,7 @@ | |||
| #include "ge_runtime/task/hccl_task.h" | |||
| #include <algorithm> | |||
| #include "framework/common/util.h" | |||
| #include "ge_runtime/task/task_factory.h" | |||
| #include "common/opskernel/ops_kernel_info_store.h" | |||
| #include "common/opskernel/ge_task_info.h" | |||
| @@ -72,7 +72,7 @@ bool LabelGotoTask::Distribute() { | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelListCpy((void**)label_list.data(), label_list.size(), label_info_, label_info_size); | |||
| rt_ret = rtLabelListCpy(reinterpret_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| @@ -80,8 +80,7 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node | |||
| NodePtr func_node = graph->GetParentNode(); | |||
| if (func_node == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Parent node not set in node:%s(%s), graph:%s", | |||
| func_node->GetName().c_str(), func_node->GetType().c_str(), graph->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Parent node not set, graph:%s", graph->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Get][Node] Parent functional node not set: %s.", graph->GetName().c_str()); | |||
| return false; | |||
| } | |||
| @@ -32,7 +32,6 @@ | |||
| #include "graph/ge_attr_value.h" | |||
| #include "graph/ge_context.h" | |||
| #include "external/graph/ge_error_codes.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/optimize/common/params.h" | |||
| #include "external/graph/types.h" | |||
| @@ -707,7 +706,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||
| if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | |||
| GE_CHECK_NOTNULL(kernel_buffer.GetData()); | |||
| std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | |||
| tbe_kernel = std::make_shared<OpKernelBin>(kernel_name, std::move(data)); | |||
| tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||
| GE_CHECK_NOTNULL(tbe_kernel); | |||
| GELOGI("Node [%s][%s] start recovery extra attr %s from %s", node_op_desc->GetName().c_str(), | |||
| node_op_desc->GetType().c_str(), ge::OP_EXTATTR_NAME_TBE_KERNEL, ATTR_NAME_TBE_KERNEL_NAME.c_str()); | |||
| @@ -793,7 +793,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
| GELOGI("Start AutoFindBpOpIndex"); | |||
| NodePtr bp_node = nullptr; | |||
| uint32_t current_idx = 0; | |||
| uint32_t netoutput_idx = 0; | |||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| @@ -811,7 +810,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
| if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | |||
| if (bp_node == nullptr) { | |||
| bp_node = node; | |||
| netoutput_idx = current_idx - 1; | |||
| } | |||
| } | |||
| if (graph->GetNeedIteration()) { | |||
| @@ -836,34 +834,30 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
| if (bp_node == nullptr) { | |||
| GELOGW("not find bp_node."); | |||
| return SUCCESS; | |||
| } else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { | |||
| profiling_point.bp_index = netoutput_idx; | |||
| GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); | |||
| } else { | |||
| profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||
| } | |||
| return SUCCESS; | |||
| return FindLastBpFromBpNode(graph, bp_node, profiling_point.bp_index); | |||
| } | |||
| uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const { | |||
| uint32_t last_bp = 0; | |||
| Status TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &target_node, | |||
| uint32_t &bp_index) const { | |||
| bp_index = 0; | |||
| auto target_desc = target_node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(target_desc); | |||
| OpDescPtr bp_op_desc = nullptr; | |||
| for (auto &in_anchor : bp_node->GetAllInDataAnchors()) { | |||
| auto out_anchor = in_anchor->GetPeerOutAnchor(); | |||
| if (out_anchor == nullptr || out_anchor->GetOwnerNode() == nullptr) { | |||
| continue; | |||
| } | |||
| auto out_node_desc = out_anchor->GetOwnerNode()->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(out_node_desc); | |||
| if (bp_op_desc == nullptr || ((out_node_desc->GetId()) > (bp_op_desc->GetId()))) { | |||
| bp_op_desc = out_node_desc; | |||
| for (auto &in_node : target_node->GetInAllNodes()) { | |||
| GE_CHECK_NOTNULL(in_node); | |||
| auto in_node_desc = in_node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(in_node_desc); | |||
| if ((bp_op_desc == nullptr || (in_node_desc->GetId() > bp_op_desc->GetId())) && | |||
| (in_node_desc->GetStreamId() == target_desc->GetStreamId())){ | |||
| bp_op_desc = in_node_desc; | |||
| } | |||
| GELOGI("bp_op_desc is %s, id is %ld", bp_op_desc->GetName().c_str(), bp_op_desc->GetId()); | |||
| } | |||
| if (bp_op_desc == nullptr) { | |||
| return last_bp; | |||
| GELOGI("Did not find bp node."); | |||
| return SUCCESS; | |||
| } | |||
| uint32_t current_idx = 0; | |||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||
| @@ -871,12 +865,14 @@ uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| current_idx++; | |||
| if (op_desc->GetName() == bp_op_desc->GetName()) { | |||
| last_bp = current_idx; | |||
| GELOGI("First bp name %s, idx %u", op_desc->GetName().c_str(), last_bp); | |||
| bp_index = current_idx; | |||
| GELOGI("Find bp name %s, idx %u", op_desc->GetName().c_str(), bp_index); | |||
| break; | |||
| } | |||
| } | |||
| return last_bp; | |||
| GELOGI("Last bp node[%s], type[%s], index[%u], stream id[%ld]", bp_op_desc->GetName().c_str(), | |||
| bp_op_desc->GetType().c_str(), bp_index, bp_op_desc->GetStreamId()); | |||
| return SUCCESS; | |||
| } | |||
| Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | |||
| @@ -116,7 +116,7 @@ class TaskGenerator { | |||
| Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; | |||
| Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | |||
| vector<uint32_t> &all_reduce_nodes) const; | |||
| uint32_t FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const; | |||
| Status FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node, uint32_t &bp_index) const; | |||
| Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | |||
| ProfilingPoint &profiling_point) const; | |||
| @@ -645,6 +645,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
| GE_CHECK_NOTNULL(args_addr); | |||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X", args_size_, sec_ret); | |||
| @@ -1000,6 +1001,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| // copy args to new host memory | |||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
| GE_CHECK_NOTNULL(args_addr); | |||
| GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | |||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| @@ -20,7 +20,6 @@ | |||
| #include <string> | |||
| #include <utility> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/manager/graph_mem_manager.h" | |||
| namespace ge { | |||
| @@ -94,7 +93,8 @@ void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | |||
| } | |||
| } | |||
| CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | |||
| CachingAllocator::CachingAllocator(rtMemType_t memory_type) | |||
| : memory_type_(memory_type), memory_allocator_(nullptr), called_malloc_counts_(0), called_free_counts_(0) { | |||
| for (uint32_t i = 0; i < kNumBins; i++) { | |||
| free_block_bins_[i] = nullptr; | |||
| } | |||
| @@ -121,6 +121,8 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||
| if (memory_allocator_ == nullptr) { | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| called_malloc_counts_ = 0; | |||
| called_free_counts_ = 0; | |||
| return ge::SUCCESS; | |||
| } | |||
| @@ -133,6 +135,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||
| uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | |||
| GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | |||
| called_malloc_counts_++; | |||
| size = GetBlockSize(size); | |||
| uint8_t *ptr = nullptr; | |||
| Block *block = FindFreeBlock(size, org_ptr, device_id); | |||
| @@ -156,6 +159,7 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device | |||
| Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { | |||
| GELOGI("Free device id = %u", device_id); | |||
| called_free_counts_++; | |||
| if (ptr == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Param ptr is nullptr, device_id:%u, check invalid", device_id); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] Invalid memory pointer, device_id:%u", device_id); | |||
| @@ -283,6 +287,7 @@ Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { | |||
| if (memory_addr == nullptr) { | |||
| GELOGE(ge::FAILED, "[Malloc][Memory] failed, no enough memory for size = %zu, device_id = %u", memory_size, | |||
| device_id); | |||
| PrintStatics(DLOG_ERROR); | |||
| return ge::FAILED; | |||
| } | |||
| GELOGT(TRACE_RUNNING, "Try to free cached memory size:%zu and malloc memory size:%zu success.", | |||
| @@ -385,14 +390,14 @@ void CachingAllocator::FreeBlockBins() { | |||
| } | |||
| void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | |||
| GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count); | |||
| GEEVENT("%6s total[size:%11zu count:%11zu].", name.c_str(), total_size, total_count); | |||
| for (auto &it : count) { | |||
| GELOGI(" |- block[size:%10zu count:%10zu].", it.first, it.second); | |||
| GEEVENT(" |- block[size:%11zu count:%11zu].", it.first, it.second); | |||
| } | |||
| } | |||
| void CachingAllocator::PrintStatics() { | |||
| if (!IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { | |||
| void CachingAllocator::PrintStatics(int32_t level) { | |||
| if (!IsLogEnable(GE_MODULE_NAME, level)) { | |||
| return; | |||
| } | |||
| size_t total_using_size = 0; | |||
| @@ -435,6 +440,7 @@ void CachingAllocator::PrintStatics() { | |||
| } | |||
| } while (0); | |||
| GEEVENT("Called counts[malloc:%11zu free:%11zu].", called_malloc_counts_.load(), called_free_counts_.load()); | |||
| PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count); | |||
| PrintCount(using_block_stat, "Using", total_using_size, total_using_count); | |||
| PrintCount(free_block_stat, "Free", total_free_size, total_free_count); | |||
| @@ -27,6 +27,7 @@ | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "graph/node.h" | |||
| #include "graph/manager/block_memory.h" | |||
| @@ -192,9 +193,10 @@ class CachingAllocator { | |||
| /// | |||
| /// @ingroup ge_graph | |||
| /// @brief print the memory info in pool | |||
| /// @param [in] log level | |||
| /// @return void | |||
| /// | |||
| void PrintStatics(); | |||
| void PrintStatics(int32_t level = DLOG_INFO); | |||
| private: | |||
| rtMemType_t memory_type_; | |||
| @@ -213,6 +215,12 @@ class CachingAllocator { | |||
| // malloced memorys from device | |||
| std::map<size_t, size_t> malloced_memory_; | |||
| //user call Malloc total counts | |||
| std::atomic<size_t> called_malloc_counts_; | |||
| //user call Free total counts | |||
| std::atomic<size_t> called_free_counts_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_ | |||
| @@ -27,6 +27,7 @@ | |||
| #include "common/math/math_util.h" | |||
| #include "common/thread_pool.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "ge_opt_info/ge_opt_info.h" | |||
| #include "analyzer/analyzer.h" | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "graph/common/local_context.h" | |||
| @@ -1002,6 +1003,12 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
| return ret; | |||
| } | |||
| ret = GeOptInfo::SetOptInfo(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[Set][OptInfo] Set optional information failed."); | |||
| return ret; | |||
| } | |||
| /// 1. BUILD_MODE_TUNING with BUILD_STEP_AFTER_UB_MATCH no need PreRunOptimizeOriginalGraph; | |||
| /// 2. BUILD_MODE_TUNING with BUILD_STEP_AFTER_MERGE no need PreRunOptimizeOriginalGraph. | |||
| /// 3. BUILD_MODE_TUNING with BUILD_STEP_AFTER_BUILDER_SUB no need PreRunOptimizeOriginalGraph. | |||
| @@ -194,35 +194,6 @@ ge::Status VarResource::GetBroadCastInfo(uint32_t graph_id, const string &var_na | |||
| return SUCCESS; | |||
| } | |||
| ge::Status VarResource::SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | |||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||
| GE_CHECK_NOTNULL(base_ptr); | |||
| GELOGI("SyncVarData2BroadCast graph_id: %u, var_name: %s.", graph_id, var_name.c_str()); | |||
| VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | |||
| uint8_t *dst_addr = base_ptr + var_broadcast_info.input_offset; | |||
| return ge::TransVarDataUtils::SyncVarData2BroadCast(var_name, var_tensor_desc, dst_addr, | |||
| var_broadcast_info.input_size, session_id_); | |||
| } | |||
| ge::Status VarResource::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | |||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||
| GELOGI("SyncBroadCastData2Var var_name: %s", var_name.c_str()); | |||
| VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | |||
| // subgraph base_ptr could be nullptr, task it as base 0 | |||
| uint8_t *dst_addr = base_ptr + var_broadcast_info.output_offset; | |||
| return ge::TransVarDataUtils::SyncBroadCastData2Var(dst_addr, var_broadcast_info.output_size, var_name, | |||
| var_tensor_desc, session_id_); | |||
| } | |||
| ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_name, | |||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||
| return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); | |||
| } | |||
| bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_map_.count(offset) > 0; } | |||
| rtMemType_t VarResource::GetVarMemType(const int64_t &offset) { | |||
| @@ -638,16 +609,6 @@ bool VarManager::IsVarExist(const std::string &var_name) { | |||
| return var_resource_->IsVarExist(var_name); | |||
| } | |||
| ge::Status VarManager::SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||
| uint8_t *base_ptr) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (var_resource_ == nullptr) { | |||
| GELOGW("VarManager has not been init."); | |||
| return ge::INTERNAL_ERROR; | |||
| } | |||
| return var_resource_->SyncVarData(graph_id, var_name, var_tensor_desc, base_ptr); | |||
| } | |||
| ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| GELOGI("VarManager::GetCurVarDesc var_name = %s.", var_name.c_str()); | |||
| @@ -701,16 +662,6 @@ ge::Status VarManager::RenewCurVarDesc(const std::string &var_name, ge::OpDescPt | |||
| return var_resource_->RenewCurVarDesc(var_name, std::move(op_desc)); | |||
| } | |||
| ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | |||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (var_resource_ == nullptr) { | |||
| GELOGW("VarManager has not been init."); | |||
| return ge::INTERNAL_ERROR; | |||
| } | |||
| return var_resource_->SyncBroadCastData2Var(graph_id, var_name, var_tensor_desc, base_ptr); | |||
| } | |||
| bool VarManager::IsVarAddr(const int64_t &offset) { | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (var_resource_ == nullptr) { | |||
| @@ -118,15 +118,6 @@ class VarResource { | |||
| ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | |||
| ge::Status SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | |||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | |||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||
| uint8_t *base_ptr); | |||
| Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | |||
| if (var_to_trans_road_.find(var_name) != var_to_trans_road_.end()) { | |||
| GELOGW("Var name: %s has already set.", var_name.c_str()); | |||
| @@ -234,16 +225,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||
| ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | |||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||
| uint8_t *base_ptr); | |||
| ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | |||
| ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | |||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||
| uint8_t *base_ptr); | |||
| ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | |||
| ge::Status RenewCurVarDesc(const std::string &var_name, ge::OpDescPtr op_desc); | |||
| @@ -415,72 +415,6 @@ Status CopyTensorFromSrcVarNode(const NodePtr &var_src, | |||
| return SUCCESS; | |||
| } | |||
| } // namespace | |||
| Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||
| uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id) { | |||
| GE_CHK_BOOL_RET_STATUS(dst_addr != nullptr, FAILED, "[Check][Param] dst addr is nullptr."); | |||
| uint8_t *src_host_addr = nullptr; | |||
| int64_t src_addr_size = 0; | |||
| GE_MAKE_GUARD_RTMEM(src_host_addr); | |||
| GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id)); | |||
| GELOGI("src_addr_size: %ld, dst_addr_size: %ld", src_addr_size, dst_addr_size); | |||
| GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, | |||
| "[Check][Param] src_addr_size:%ld not equal to dst_addr_size:%ld", | |||
| src_addr_size, dst_addr_size); | |||
| GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
| return SUCCESS; | |||
| } | |||
| Status TransVarDataUtils::SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, | |||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { | |||
| GE_CHK_BOOL_RET_STATUS(src_addr != nullptr, FAILED, "[Check][Param] src addr is nullptr. "); | |||
| uint8_t *host_addr = nullptr; | |||
| GE_MAKE_GUARD_RTMEM(host_addr); | |||
| GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(&host_addr), src_addr_size)); | |||
| GE_CHK_RT_RET(rtMemcpy(host_addr, src_addr_size, src_addr, src_addr_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
| GE_CHK_STATUS_RET( | |||
| SyncTensorToDevice(var_name, reinterpret_cast<uint8_t *>(host_addr), src_addr_size, dst_tensor_desc, session_id)); | |||
| return SUCCESS; | |||
| } | |||
| Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||
| uint8_t **host_addr, int64_t &src_tensor_size, uint64_t session_id) { | |||
| GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(src_tensor_desc, src_tensor_size), "[Get][Size] from TensorDesc failed"); | |||
| uint8_t *src_addr = nullptr; | |||
| GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); | |||
| uint8_t *mem_addr = | |||
| src_addr - | |||
| static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
| static_cast<int64_t>( | |||
| reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
| GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); | |||
| GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
| GELOGI("SyncTensorToHost var_name %s, src_tensor_size %ld", var_name.c_str(), src_tensor_size); | |||
| return SUCCESS; | |||
| } | |||
| Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8_t *host_addr, uint32_t addr_size, | |||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { | |||
| uint8_t *dst_addr = nullptr; | |||
| GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); | |||
| uint8_t *mem_addr = | |||
| dst_addr - | |||
| static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
| static_cast<int64_t>( | |||
| reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
| GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
| GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size); | |||
| return SUCCESS; | |||
| } | |||
| Status TransVarDataUtils::TransAllVarData(const vector<NodePtr> &variable_nodes, | |||
| uint64_t session_id, | |||
| rtContext_t context, | |||
| @@ -29,11 +29,6 @@ | |||
| namespace ge { | |||
| class TransVarDataUtils { | |||
| public: | |||
| static ge::Status SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||
| uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id_); | |||
| static ge::Status SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, | |||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id_); | |||
| static ge::Status TransAllVarData(const std::vector<NodePtr> &variable_nodes, | |||
| uint64_t session_id, | |||
| rtContext_t context, | |||
| @@ -41,12 +36,6 @@ class TransVarDataUtils { | |||
| uint32_t thread_num = 16); | |||
| static ge::Status CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id); | |||
| private: | |||
| static ge::Status SyncTensorToHost(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, | |||
| uint8_t **host_addr, int64_t &addr_size, uint64_t session_id_); | |||
| static ge::Status SyncTensorToDevice(const string &var_name, const uint8_t *host_addr, uint32_t addr_size, | |||
| const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id_); | |||
| }; | |||
| } // namespace ge | |||
| @@ -20,17 +20,23 @@ | |||
| #include "external/graph/operator_factory.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||
| #include "init/gelib.h" | |||
| namespace ge { | |||
| const int64_t kStartCallNum = 1; | |||
| const std::string kKernelLibName = "aicpu_tf_kernel"; | |||
| // tf_kernel.json opsFlag config | |||
| const std::string kOpsFlagClose = "0"; | |||
| Status RunOpKernelWithCheck(NodePtr &node, | |||
| const vector<ConstGeTensorPtr> &inputs, | |||
| std::vector<GeTensorPtr> &outputs) { | |||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||
| return statistic_of_ge_constant_folding_; | |||
| } | |||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||
| return statistic_of_op_constant_folding_; | |||
| } | |||
| Status ConstantFoldingPass::RunOpKernelWithCheck(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||
| std::vector<GeTensorPtr> &outputs) { | |||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] GE is not initialized or is finalized."); | |||
| @@ -47,15 +53,13 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||
| if (ops_flag == kOpsFlagClose) { | |||
| return UNSUPPORTED; | |||
| } | |||
| return FoldingPass::RunOpKernel(node, inputs, outputs); | |||
| return RunOpKernel(node, inputs, outputs); | |||
| } | |||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||
| return statistic_of_ge_constant_folding_; | |||
| } | |||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||
| return statistic_of_op_constant_folding_; | |||
| Status ConstantFoldingPass::RunOpKernel(NodePtr &node, | |||
| const vector<ConstGeTensorPtr> &inputs, | |||
| std::vector<GeTensorPtr> &outputs) { | |||
| return HostCpuEngine::GetInstance().Run(node, inputs, outputs); | |||
| } | |||
| Status ConstantFoldingPass::Run(ge::NodePtr &node) { | |||
| @@ -28,6 +28,11 @@ class ConstantFoldingPass : public FoldingPass { | |||
| Status Run(ge::NodePtr &node) override; | |||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||
| static Status RunOpKernel(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, vector<GeTensorPtr> &outputs); | |||
| static Status RunOpKernelWithCheck(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||
| std::vector<GeTensorPtr> &outputs); | |||
| private: | |||
| std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||
| std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||
| @@ -28,8 +28,6 @@ | |||
| #include "inc/kernel.h" | |||
| #include "inc/kernel_factory.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||
| namespace ge { | |||
| namespace folding_pass { | |||
| @@ -123,12 +121,6 @@ NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tens | |||
| } | |||
| } // namespace | |||
| Status FoldingPass::RunOpKernel(NodePtr &node, | |||
| const vector<ConstGeTensorPtr> &inputs, | |||
| std::vector<GeTensorPtr> &outputs) { | |||
| return HostCpuEngine::GetInstance().Run(node, inputs, outputs); | |||
| } | |||
| Status FoldingPass::Folding(NodePtr &node, vector<GeTensorPtr> &outputs) { | |||
| GE_CHECK_NOTNULL(node); | |||
| GELOGD("begin folding node:%s", node->GetName().c_str()); | |||
| @@ -34,8 +34,6 @@ bool IsNoNeedConstantFolding(const NodePtr &node); | |||
| using IndexsToAnchors = std::map<int, std::vector<InDataAnchorPtr>>; | |||
| class FoldingPass : public BaseNodePass { | |||
| public: | |||
| static Status RunOpKernel(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, vector<GeTensorPtr> &outputs); | |||
| protected: | |||
| Status Folding(NodePtr &node, vector<GeTensorPtr> &outputs); | |||
| private: | |||
| @@ -0,0 +1,385 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "infer_base_pass.h" | |||
| #include "common/ge/ge_util.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| namespace ge { | |||
| namespace { | |||
| graphStatus FindValidSubgraphNetoutput(const ConstNodePtr &node, const ComputeGraphPtr &sub_graph, NodePtr &netoutput) { | |||
| auto sub_nodes = sub_graph->GetDirectNode(); | |||
| for (size_t i = sub_nodes.size(); i > 0; --i) { | |||
| auto sub_node = sub_nodes.at(i - 1); | |||
| if (sub_node->GetType() == NETOUTPUT) { | |||
| if (sub_node == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "NetOutput node is null in subgraph %s, parent node %s.", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Check][Param] NetOutput node is null on sub graph %s, parent node %s", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| auto sub_node_opdesc = sub_node->GetOpDesc(); | |||
| if (sub_node_opdesc == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Invalid NetOutput node in subgraph %s, parent node %s, no OpDesc on it", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Check][Param] Invalid NetOutput node on sub graph %s, parent node %s, no OpDesc on it", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| netoutput = sub_node; | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| } | |||
| REPORT_INNER_ERROR("E19999", "Can not find the NetOutput node in subgraph %s, parent node %s", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Check][Param] Can not find the NetOutput node in subgraph %s, parent node %s", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| } // namespace | |||
| Status InferBasePass::Run(NodePtr &node) { | |||
| GE_CHECK_NOTNULL(node); | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| bool need_infer = NeedInfer(node); | |||
| if (!need_infer) { | |||
| GELOGD("Node %s does not need to infer.", node->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| std::set<NodePtr> changed_nodes; | |||
| auto ret = InferAndUpdate(node, !OptionExists(kOptimizeAfterSubGraph), changed_nodes); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGE(ret, "Infer and update for node %s failed! ret: %u", node->GetName().c_str(), ret); | |||
| return GRAPH_FAILED; | |||
| } | |||
| AddChangedNodesImmediateRepass(changed_nodes); | |||
| return SUCCESS; | |||
| } | |||
| bool InferBasePass::NeedInfer(const NodePtr &node) const { return true; } | |||
| void InferBasePass::AddChangedNodesImmediateRepass(const std::set<NodePtr> &changed_nodes) { | |||
| // need passed_nodes set to solve the problem that multi-input operators do repass in advance. | |||
| // when there is passed_nodes set, wo should call AddImmediateRePassNode for all nodes in changed_nodes. | |||
| } | |||
| graphStatus InferBasePass::InferAndUpdate(NodePtr &node, bool before_subgraph, std::set<NodePtr> &changed_nodes) { | |||
| graphStatus ret; | |||
| if (ContainsSubgraph(node)) { | |||
| if (before_subgraph) { | |||
| ret = UpdateTensorDescToSubgraphData(node); | |||
| } else { | |||
| ret = UpdateTensorDescToParentNodeOutput(node); | |||
| } | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGE(ret, "Update tensor desc failed between parent node %s and subgraphs. ret: %u", node->GetName().c_str(), | |||
| ret); | |||
| return ret; | |||
| } | |||
| } | |||
| PrintInOutTensors(node, "before_infer"); | |||
| ret = Infer(node); | |||
| PrintInOutTensors(node, "after_infer"); | |||
| if (ret == GRAPH_NODE_NEED_REPASS) { | |||
| // if a node need re_pass, it is not necessary to update peer node input. | |||
| changed_nodes.insert(node); | |||
| return GRAPH_SUCCESS; | |||
| } else if (ret != GRAPH_SUCCESS && ret != GRAPH_NOT_CHANGED) { | |||
| GELOGE(ret, "Infer failed for node %s, ret: %u", node->GetName().c_str(), ret); | |||
| return ret; | |||
| } | |||
| ret = UpdateTensorDescToPeerInputs(node, changed_nodes); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGE(ret, "Node %s updates tensor desc to peer input nodes failed! ret: %u", node->GetName().c_str(), ret); | |||
| } | |||
| GELOGD("Node %s infer and update succeeded .", node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| bool InferBasePass::ContainsSubgraph(const NodePtr &node) { | |||
| auto sub_graph_names = node->GetOpDesc()->GetSubgraphInstanceNames(); | |||
| return !sub_graph_names.empty(); | |||
| } | |||
| graphStatus InferBasePass::UpdateTensorDescToPeerInputs(NodePtr &node, std::set<NodePtr> &changed_nodes) { | |||
| auto op_desc = node->GetOpDesc(); | |||
| for (const auto &out_anchor : node->GetAllOutDataAnchors()) { | |||
| auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | |||
| for (const auto &peer_anchor : out_anchor->GetPeerInDataAnchors()) { | |||
| auto peer_anchor_opdesc = peer_anchor->GetOwnerNode()->GetOpDesc(); | |||
| if (peer_anchor_opdesc == nullptr) { | |||
| continue; | |||
| } | |||
| auto peer_input_desc = peer_anchor_opdesc->MutableInputDesc(peer_anchor->GetIdx()); | |||
| if (peer_input_desc == nullptr) { | |||
| continue; | |||
| } | |||
| bool changed = false; | |||
| auto ret = UpdateTensorDesc(output_tensor, peer_input_desc, changed); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Update peer input desc failed, node %s.", node->GetName().c_str()); | |||
| GELOGE(ret, "Update peer input desc failed, node %s.", node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| if (changed) { | |||
| changed_nodes.insert(peer_anchor->GetOwnerNode()); | |||
| GELOGD("Node %s update peer node succeeded, peer node %s is changed.", node->GetName().c_str(), | |||
| peer_anchor->GetOwnerNode()->GetName().c_str()); | |||
| } | |||
| } | |||
| } | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| std::vector<ComputeGraphPtr> InferBasePass::GetCurNodeSubgraphs(const NodePtr &node) { | |||
| std::vector<ComputeGraphPtr> cur_node_subgraph; | |||
| auto op_desc = node->GetOpDesc(); | |||
| auto sub_graph_names = op_desc->GetSubgraphInstanceNames(); | |||
| if (sub_graph_names.empty()) { | |||
| return cur_node_subgraph; | |||
| } | |||
| auto root_graph = GraphUtils::FindRootGraph(node->GetOwnerComputeGraph()); | |||
| for (const auto &name : sub_graph_names) { | |||
| if (name.empty()) { | |||
| GELOGW("The node %s contains empty subgraph instance name", node->GetName().c_str()); | |||
| continue; | |||
| } | |||
| auto sub_graph = root_graph->GetSubgraph(name); | |||
| if (sub_graph == nullptr) { | |||
| GELOGW("The subgrpah %s for node %s is null.", name.c_str(), node->GetName().c_str()); | |||
| continue; | |||
| } | |||
| cur_node_subgraph.emplace_back(sub_graph); | |||
| } | |||
| return cur_node_subgraph; | |||
| } | |||
| graphStatus InferBasePass::UpdateTensorDescToSubgraphData(NodePtr &node) { | |||
| auto op_desc = node->GetOpDesc(); | |||
| for (const auto &sub_graph : GetCurNodeSubgraphs(node)) { | |||
| for (const auto &node_sub : sub_graph->GetDirectNode()) { | |||
| if (node_sub->GetType() != DATA) { | |||
| continue; | |||
| } | |||
| auto data_opdesc = node_sub->GetOpDesc(); | |||
| if (data_opdesc == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Invalid data node on the sub graph %s parent node %s, no OpDesc", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Get][OpDesc] Invalid data node on the sub graph %s parent node %s, no OpDesc", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| int ref_i; | |||
| if (!AttrUtils::GetInt(data_opdesc, ATTR_NAME_PARENT_NODE_INDEX, ref_i)) { | |||
| REPORT_INNER_ERROR("E19999", "Invalid data node on the sub graph %s parent node %s, no ref-index attribute", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Get][Int] Invalid data node on the sub graph %s parent node %s, no ref-index attribute", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| GELOGD("Subgraph Data node ref_index is %d, parent node is %s.", ref_i, node->GetName().c_str()); | |||
| // In multi-batch, data shape of subgraph is different, no need to refresh. | |||
| if (data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||
| GELOGD("While updating subgraph data node, ignore node %s which is created by multi-dims", | |||
| data_opdesc->GetName().c_str()); | |||
| continue; | |||
| } | |||
| auto input_desc = op_desc->MutableInputDesc(ref_i); | |||
| if (input_desc == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", | |||
| "The ref index(%d) on the data %s on the sub graph %s " | |||
| "parent node %s are incompatible, inputs num %u", | |||
| ref_i, node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str(), | |||
| node->GetAllInDataAnchorsSize()); | |||
| GELOGE(GRAPH_FAILED, | |||
| "[Call][MutableInputDesc] The ref index(%d) on the data %s on the sub graph %s " | |||
| "parent node %s are incompatible, inputs num %u", | |||
| ref_i, node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str(), | |||
| node->GetAllInDataAnchorsSize()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| GELOGI("Ref index is %d, input_desc dtype is %d, node name is %s", ref_i, input_desc->GetDataType(), | |||
| node->GetName().c_str()); | |||
| bool has_tensor_desc_changed = false; | |||
| auto data_input_td = data_opdesc->MutableInputDesc(0); | |||
| auto ret = UpdateTensorDesc(input_desc, data_input_td, has_tensor_desc_changed); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Failed to update input desc of data %s on the sub graph %s parent node %s", | |||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Update][InputDesc] of data %s on the sub graph %s parent node %s failed", | |||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| auto data_output_td = data_opdesc->MutableOutputDesc(0); | |||
| ret = UpdateTensorDesc(input_desc, data_output_td, has_tensor_desc_changed); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Failed to update output desc of data %s on the sub graph %s parent node %s", | |||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, "[Update][OutputDesc] of data %s on the sub graph %s parent node %s failed", | |||
| node_sub->GetName().c_str(), sub_graph->GetName().c_str(), node->GetName().c_str()); | |||
| return ret; | |||
| } | |||
| GELOGD("Parent node %s update subgraph data %s input and output succeed.", node->GetName().c_str(), | |||
| data_opdesc->GetName().c_str()); | |||
| } | |||
| } | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| graphStatus InferBasePass::UpdateTensorDescToParentNodeOutput(NodePtr &node) { | |||
| std::vector<std::vector<GeTensorDescPtr>> ref_out_tensors(node->GetAllOutDataAnchorsSize()); | |||
| for (const auto &sub_graph : GetCurNodeSubgraphs(node)) { | |||
| NodePtr netoutput; | |||
| auto ret = FindValidSubgraphNetoutput(node, sub_graph, netoutput); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| return ret; | |||
| } | |||
| auto netoutput_opdesc = netoutput->GetOpDesc(); | |||
| for (auto &netoutput_in_anchor : netoutput->GetAllInDataAnchors()) { | |||
| auto netoutput_in_desc = netoutput_opdesc->MutableInputDesc(netoutput_in_anchor->GetIdx()); | |||
| if (netoutput_in_desc == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Invalid NetOutput node on sub graph %s, parent node %s, can not find input tensor %d", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str(), netoutput_in_anchor->GetIdx()); | |||
| GELOGE(GRAPH_FAILED, | |||
| "[Get][Tensor] Invalid NetOutput node on sub graph %s, parent node %s, can not find input tensor %d", | |||
| sub_graph->GetName().c_str(), node->GetName().c_str(), netoutput_in_anchor->GetIdx()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| GELOGI("Netoutput in anchor index is %d, input tensor dim is %zu", netoutput_in_anchor->GetIdx(), | |||
| netoutput_in_desc->GetShape().GetDimNum()); | |||
| int ref_i; | |||
| if (!AttrUtils::GetInt(netoutput_in_desc, ATTR_NAME_PARENT_NODE_INDEX, ref_i)) { | |||
| // if there is no ref index on the TensorDesc, it means the output data will be ignored outer. | |||
| continue; | |||
| } | |||
| GELOGI("Parent node index of edge desc is %d", ref_i); | |||
| if (ref_i < 0 || static_cast<uint32_t>(ref_i) >= node->GetAllOutDataAnchorsSize()) { | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Invalid ref_index %d of parent node %s, ref_index should less than %u.", ref_i, | |||
| node->GetName().c_str(), node->GetAllOutDataAnchorsSize()); | |||
| GELOGE(GRAPH_FAILED, | |||
| "[Get][Ref_index] Invalid ref_index %d of parent node %s, ref_index should less than %u.", ref_i, | |||
| node->GetName().c_str(), node->GetAllOutDataAnchorsSize()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| ref_out_tensors[ref_i].emplace_back(netoutput_in_desc); | |||
| } | |||
| } | |||
| return UpdateParentNodeContainsSubgraphs(node, ref_out_tensors); | |||
| } | |||
| graphStatus InferBasePass::UpdateParentNodeContainsSubgraphs( | |||
| NodePtr &node, const std::vector<std::vector<GeTensorDescPtr>> &ref_out_tensors) { | |||
| for (size_t i = 0; i < ref_out_tensors.size(); i++) { | |||
| if (ref_out_tensors[i].empty()) { | |||
| REPORT_CALL_ERROR("E19999", "Parent node %s ref_index %zu subgraph output tensor list is empty.", | |||
| node->GetName().c_str(), i); | |||
| GELOGE(GRAPH_FAILED, "[Param][check] Parent node %s ref_index %zu subgraph output tensor list is empty.", | |||
| node->GetName().c_str(), i); | |||
| return GRAPH_FAILED; | |||
| } | |||
| auto node_op_desc = node->GetOpDesc(); | |||
| auto node_output_td = node_op_desc->MutableOutputDesc(i); | |||
| if (node_output_td == nullptr) { | |||
| REPORT_CALL_ERROR("E19999", "Node %s output %zu tensor desc is null.", node->GetName().c_str(), i); | |||
| GELOGE(GRAPH_FAILED, "[Param][check] Node %s output %zu tensor desc is null.", node->GetName().c_str(), i); | |||
| return GRAPH_FAILED; | |||
| } | |||
| graphStatus ret; | |||
| if (node_op_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { | |||
| ret = UpdateOutputFromSubgraphsForMultiDims(ref_out_tensors[i], node_output_td); | |||
| } else { | |||
| ret = UpdateOutputFromSubgraphs(ref_out_tensors[i], node_output_td); | |||
| } | |||
| if (ret != GRAPH_SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "Node %s update output %zu tensor desc failed. ret: %u", node->GetName().c_str(), i, | |||
| ret); | |||
| GELOGE(GRAPH_FAILED, "[Param][check] Node %s update output %zu tensor desc failed. ret: %u", | |||
| node->GetName().c_str(), i, ret); | |||
| return ret; | |||
| } | |||
| GELOGD("Parent node %s successfully updated the output tensors from subgraphs.", node->GetName().c_str()); | |||
| } | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| void InferBasePass::PrintInOutTensors(const NodePtr &node, const std::string &phase) { | |||
| if (!IsLogEnable(GE, DLOG_DEBUG)) { | |||
| return; | |||
| } | |||
| if (node == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid"); | |||
| GELOGE(GRAPH_FAILED, "[Check][Param] node is null"); | |||
| return; | |||
| } | |||
| ge::OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(op_desc == nullptr, REPORT_INNER_ERROR("E19999", "Node has no opdesc, check invalid"); | |||
| GELOGE(GRAPH_FAILED, "[Get][OpDesc] op_desc is null."); return ); | |||
| std::stringstream ss; | |||
| ss << "{"; | |||
| int32_t in_idx = 0; | |||
| for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | |||
| if (input_desc == nullptr) { | |||
| in_idx++; | |||
| continue; | |||
| } | |||
| if (in_idx > 0) { | |||
| ss << " "; | |||
| } | |||
| ss << "input_" << in_idx << " tensor: "; | |||
| ss << SerialTensorInfo(input_desc); | |||
| in_idx++; | |||
| } | |||
| int32_t out_idx = 0; | |||
| for (const auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||
| if (output_desc == nullptr) { | |||
| out_idx++; | |||
| continue; | |||
| } | |||
| ss << " "; | |||
| ss << "output_" << out_idx << " tensor: "; | |||
| ss << SerialTensorInfo(output_desc); | |||
| out_idx++; | |||
| } | |||
| ss << "}"; | |||
| GELOGD("Infer tensor dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), ss.str().c_str()); | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,65 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_PASSES_INFER_BASE_PASS_H_ | |||
| #define GE_GRAPH_PASSES_INFER_BASE_PASS_H_ | |||
| #include "graph/passes/base_pass.h" | |||
| namespace ge { | |||
| class InferBasePass : public BaseNodePass { | |||
| public: | |||
| Status Run(NodePtr &node) override; | |||
| graphStatus InferAndUpdate(NodePtr &node, bool before_subgraph, std::set<NodePtr> &changed_nodes); | |||
| void PrintInOutTensors(const NodePtr &node, const std::string &phase); | |||
| protected: | |||
| virtual std::string SerialTensorInfo(const GeTensorDescPtr &tensor_desc) const = 0; | |||
| virtual bool NeedInfer(const NodePtr &node) const; | |||
| virtual graphStatus Infer(NodePtr &node) = 0; | |||
| /** | |||
| * Update the output TensorDesc by src TensorDesc. This will be called when updating peer node input desc. | |||
| * @param src, input TensorDesc | |||
| * @param dst, output TensorDesc to be updated | |||
| * @return | |||
| */ | |||
| virtual graphStatus UpdateTensorDesc(const GeTensorDescPtr &src, GeTensorDescPtr &dst, bool &changed) = 0; | |||
| /** | |||
| * Update the output TensorDesc for nodes which contain subgraphs. | |||
| * In dynamic multi-dims/batch/images size scene, the update process maybe different, | |||
| * in which case, the `InferBasePass` will call method `UpdateOutputFromSubgraphsForMultiDims` instead. | |||
| * @param src, input TensorDesc from NetOutput nodes in all subgraphs | |||
| * @param dst, output TensorDesc to be updated | |||
| * @return | |||
| */ | |||
| virtual graphStatus UpdateOutputFromSubgraphs(const std::vector<GeTensorDescPtr> &src, | |||
| GeTensorDescPtr &dst) = 0; | |||
| virtual graphStatus UpdateOutputFromSubgraphsForMultiDims(const std::vector<GeTensorDescPtr> &src, | |||
| GeTensorDescPtr &dst) = 0; | |||
| private: | |||
| void AddChangedNodesImmediateRepass(const std::set<NodePtr> &changed_nodes); | |||
| bool ContainsSubgraph(const NodePtr &node); | |||
| std::vector<ComputeGraphPtr> GetCurNodeSubgraphs(const NodePtr &node); | |||
| graphStatus UpdateTensorDescToSubgraphData(NodePtr &node); | |||
| graphStatus UpdateTensorDescToParentNodeOutput(NodePtr &node); | |||
| graphStatus UpdateParentNodeContainsSubgraphs(NodePtr &node, | |||
| const std::vector<std::vector<GeTensorDescPtr>> &ref_out_tensors); | |||
| graphStatus UpdateTensorDescToPeerInputs(NodePtr &node, std::set<NodePtr> &changed_nodes); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_INFER_BASE_PASS_H_ | |||
| @@ -0,0 +1,523 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "graph/passes/infer_value_range_pass.h" | |||
| #include "common/formats/utils/formats_trans_utils.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/operator_factory_impl.h" | |||
| #include "graph/passes/constant_folding_pass.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "common/ge/ge_util.h" | |||
| using std::unique_ptr; | |||
| namespace ge { | |||
| namespace { | |||
| #define GET_DATA_BY_DTYPE(DTYPE, TYPE) \ | |||
| case (DTYPE): \ | |||
| ConstructValueRange<TYPE>(lower_boundary_tensor, upper_boundary_tensor, output_tensor_value_range); \ | |||
| break; | |||
| void SerialShapeRange(const GeTensorDescPtr &desc, std::string &desc_str) { | |||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | |||
| (void)desc->GetShapeRange(shape_range); | |||
| desc_str += formats::RangeToString(shape_range); | |||
| shape_range.clear(); | |||
| (void)desc->GetOriginShapeRange(shape_range); | |||
| desc_str += ","; | |||
| desc_str += formats::RangeToString(shape_range); | |||
| shape_range.clear(); | |||
| } | |||
| Status RunCpuKernelForValueRange(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||
| std::vector<GeTensorPtr> &outputs) { | |||
| // RunOpKernelWithCheck, RunOpKernel for test | |||
| auto ret = ConstantFoldingPass::RunOpKernel(node, inputs, outputs); | |||
| if (ret != SUCCESS) { | |||
| auto op_kernel = folding_pass::GetKernelByType(node); | |||
| if (op_kernel == nullptr) { | |||
| GELOGW("Calculate value range failed, no op kernel for node %s type %s", node->GetName().c_str(), | |||
| node->GetType().c_str()); | |||
| return NOT_CHANGED; | |||
| } | |||
| ret = op_kernel->Compute(node->GetOpDesc(), inputs, outputs); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("Calculate value range failed, node %s run cpu kernel failed.", node->GetName().c_str()); | |||
| return NOT_CHANGED; | |||
| } | |||
| } | |||
| GELOGI("Node %s type %s, run cpu kernel success.", node->GetName().c_str(), node->GetType().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace | |||
| graphStatus InferValueRangePass::Infer(NodePtr &node) { | |||
| auto infer_value_range_param = OperatorFactoryImpl::GetInferValueRangePara(node->GetType()); | |||
| // Use registered func to calculate value range | |||
| if (!infer_value_range_param.use_cpu_kernel) { | |||
| if (infer_value_range_param.infer_value_func == nullptr) { | |||
| GELOGW("The registered func of node %s to infer value range is nullptr.", node->GetName().c_str()); | |||
| return GRAPH_NOT_CHANGED; | |||
| } | |||
| Operator op = OpDescUtils::CreateOperatorFromNode(node); | |||
| auto ret = node->GetOpDesc()->CallInferValueRangeFunc(op); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGW("Node %s call infer value range func failed, ret: %u.", node->GetName().c_str(), ret); | |||
| return GRAPH_NOT_CHANGED; | |||
| } | |||
| GELOGD("Node %s infer value range func succeed by registered func.", node->GetName().c_str()); | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| // if input value range has -1, cpu kernel cannot calculate correctly, so set {1:-1} | |||
| if (InputHasUnknownValueRange(node)) { | |||
| GELOGI("Node %s has unknown value range in input tensors, set value range {1:-1}, and skip cpu kernel.", | |||
| node->GetName().c_str()); | |||
| return GenerateWorstValueRange(node); | |||
| } | |||
| // Use CPU kernel func to calculate value range | |||
| auto ret = ConstructInputAndInferValueRange(node); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGW("Use CPU kernel to calculate value range failed. node: %s, ret: %u", node->GetName().c_str(), ret); | |||
| return GRAPH_NOT_CHANGED; | |||
| } | |||
| GELOGD("Node %s infer value range func succeed by running cpu kernel.", node->GetName().c_str()); | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| std::string InferValueRangePass::SerialTensorInfo(const GeTensorDescPtr &tensor_desc) const { | |||
| std::stringstream ss; | |||
| ss << "["; | |||
| ss << "(shape:[" << tensor_desc->MutableShape().ToString() << "]),"; | |||
| string range_str; | |||
| SerialShapeRange(tensor_desc, range_str); | |||
| ss << "(shape_range:" << range_str << "),"; | |||
| std::vector<std::pair<int64_t, int64_t>> value_range; | |||
| (void)tensor_desc->GetValueRange(value_range); | |||
| string value_range_str = formats::RangeToString(value_range); | |||
| ss << "(value_range:" << value_range_str << ")]"; | |||
| return ss.str(); | |||
| } | |||
| bool InferValueRangePass::NeedInfer(const NodePtr &node) const { | |||
| auto infer_value_range_param = OperatorFactoryImpl::GetInferValueRangePara(node->GetType()); | |||
| if (!infer_value_range_param.is_initialized) { | |||
| GELOGD("Node %s does not register func to infer value range, skip infer_value_range_pass.", | |||
| node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| if (infer_value_range_param.when_call == INPUT_IS_DYNAMIC) { | |||
| // Only do infer for node that all inputs are dynamic, such as shape | |||
| if (InputIsDynamic(node)) { | |||
| return true; | |||
| } | |||
| GELOGD("Node %s register func to infer value range and when_call is INPUT_IS_DYNAMIC, but check input failed.", | |||
| node->GetName().c_str()); | |||
| } else if (infer_value_range_param.when_call == INPUT_HAS_VALUE_RANGE) { | |||
| // Only do infer for node that all inputs have value_range or node type of inputs is constant/const | |||
| if (InputIsConstOrHasValueRange(node)) { | |||
| return true; | |||
| } | |||
| GELOGD("Node %s register func to infer value range and when_call is INPUT_HAS_VALUE_RANGE, but check input failed.", | |||
| node->GetName().c_str()); | |||
| } | |||
| GELOGD("Node %s does not need to infer value range, skip infer_value_range_pass.", node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| bool InferValueRangePass::InputIsDynamic(const NodePtr &node) const{ | |||
| bool input_is_dynamic = false; | |||
| auto cur_op_desc = node->GetOpDesc(); | |||
| for (const auto &input_desc : cur_op_desc->GetAllInputsDescPtr()) { | |||
| auto dims = input_desc->GetShape().GetDims(); | |||
| for (auto dim : dims) { | |||
| if (dim == UNKNOWN_DIM || dim == UNKNOWN_DIM_NUM) { | |||
| input_is_dynamic = true; | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| return input_is_dynamic; | |||
| } | |||
| bool InferValueRangePass::InputIsConstOrHasValueRange(const NodePtr &node) const { | |||
| bool input_is_const_or_has_value_range = true; | |||
| auto cur_op_desc = node->GetOpDesc(); | |||
| auto in_data_anchors = node->GetAllInDataAnchors(); | |||
| for (size_t i = 0; i < in_data_anchors.size(); ++i) { | |||
| auto peer_out_anchor = in_data_anchors.at(i)->GetPeerOutAnchor(); | |||
| if (peer_out_anchor == nullptr) { | |||
| continue; | |||
| } | |||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||
| if (peer_node == nullptr || peer_node->GetOpDesc() == nullptr) { | |||
| continue; | |||
| } | |||
| if ((peer_node->GetType() == CONSTANT) || (peer_node->GetType() == CONSTANTOP)) { | |||
| continue; | |||
| } | |||
| const auto &input_desc = cur_op_desc->GetInputDesc(i); | |||
| std::vector<std::pair<int64_t, int64_t>> value_range; | |||
| (void)input_desc.GetValueRange(value_range); | |||
| if (value_range.empty()) { | |||
| GELOGD("Node %s input %zu does not have value range, skip infer_value_range_pass for current node.", | |||
| node->GetName().c_str(), i); | |||
| input_is_const_or_has_value_range = false; | |||
| break; | |||
| } | |||
| } | |||
| return input_is_const_or_has_value_range; | |||
| } | |||
| bool InferValueRangePass::InputHasUnknownValueRange(const NodePtr &node) const { | |||
| bool has_unknown_value_range = false; | |||
| auto cur_op_desc = node->GetOpDesc(); | |||
| for (const auto &input_desc : cur_op_desc->GetAllInputsDescPtr()) { | |||
| std::vector<std::pair<int64_t, int64_t>> input_desc_value_range; | |||
| input_desc->GetValueRange(input_desc_value_range); | |||
| if (!input_desc_value_range.empty()) { | |||
| for (const auto &range : input_desc_value_range) { | |||
| if (range.first == -1 || range.second == -1) { | |||
| GELOGD("Node %s input tensors have unknown value range, value range is %s.", node->GetName().c_str(), | |||
| formats::RangeToString(input_desc_value_range).c_str()); | |||
| has_unknown_value_range = true; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return has_unknown_value_range; | |||
| } | |||
| graphStatus InferValueRangePass::UpdateTensorDesc(const GeTensorDescPtr &src, GeTensorDescPtr &dst, bool &changed) { | |||
| if (src == nullptr || dst == nullptr) { | |||
| REPORT_CALL_ERROR("E19999", "While updating tensor desc, input desc is null."); | |||
| GELOGE(GRAPH_FAILED, "[Param][check] While updating tensor desc, input desc is null."); | |||
| return GRAPH_FAILED; | |||
| } | |||
| changed = false; | |||
| std::vector<std::pair<int64_t, int64_t>> src_value_range; | |||
| std::vector<std::pair<int64_t, int64_t>> dst_value_range; | |||
| (void)src->GetValueRange(src_value_range); | |||
| (void)dst->GetValueRange(dst_value_range); | |||
| if (src_value_range != dst_value_range) { | |||
| GELOGD("While updating tensor desc, value range has been changed, src value range: %s, dst value range: %s.", | |||
| formats::RangeToString(src_value_range).c_str(), formats::RangeToString(dst_value_range).c_str()); | |||
| changed = true; | |||
| } | |||
| dst->SetValueRange(src_value_range); | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| graphStatus InferValueRangePass::UpdateOutputFromSubgraphs(const std::vector<GeTensorDescPtr> &src, | |||
| GeTensorDescPtr &dst) { | |||
| std::vector<std::pair<int64_t, int64_t>> ref_out_tensor_value_range; | |||
| auto ref_out_tensor = src.at(0); | |||
| (void)ref_out_tensor->GetValueRange(ref_out_tensor_value_range); | |||
| for (auto &ref_tensor : src) { | |||
| std::vector<std::pair<int64_t, int64_t>> ref_tensor_value_range; | |||
| (void)ref_tensor->GetValueRange(ref_tensor_value_range); | |||
| if (ref_tensor_value_range.size() != ref_out_tensor_value_range.size()) { | |||
| GELOGD("Update TensorDesc %s failed, rank of value ranges %s and %s are not the same, skip value range refresh.", | |||
| dst->GetName().c_str(), formats::RangeToString(ref_out_tensor_value_range).c_str(), | |||
| formats::RangeToString(ref_tensor_value_range).c_str()); | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| for (size_t j = 0; j < ref_out_tensor_value_range.size(); j++) { | |||
| if ((ref_out_tensor_value_range.at(j).first != ref_tensor_value_range.at(j).first) || | |||
| (ref_out_tensor_value_range.at(j).second != ref_tensor_value_range.at(j).second)) { | |||
| ref_out_tensor_value_range[j] = std::make_pair(1, -1); | |||
| } | |||
| } | |||
| } | |||
| GELOGD("While updating output desc from subgraphs, set parent node desc value range %s.", | |||
| formats::RangeToString(ref_out_tensor_value_range).c_str()); | |||
| dst->SetValueRange(ref_out_tensor_value_range); | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| graphStatus InferValueRangePass::UpdateOutputFromSubgraphsForMultiDims(const std::vector<GeTensorDescPtr> &src, | |||
| GeTensorDescPtr &dst) { | |||
| REPORT_INNER_ERROR("E19999", | |||
| "Update TensorDesc %s failed. In dynamic multi-dims size scene, there should be no value range.", | |||
| dst->GetName().c_str()); | |||
| GELOGE(GRAPH_FAILED, | |||
| "[Update][TensorDesc] %s failed. In dynamic multi-dims size scene, there should be no value range.", | |||
| dst->GetName().c_str()); | |||
| return GRAPH_FAILED; | |||
| } | |||
| graphStatus InferValueRangePass::GenerateWorstValueRange(NodePtr &node) { | |||
| GELOGI("Node %s does not run cpu kernel, because input value range has -1.", node->GetName().c_str()); | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||
| auto output_desc = op_desc->MutableOutputDesc(i); | |||
| if (output_desc == nullptr) { | |||
| continue; | |||
| } | |||
| auto output_i_shape = output_desc->GetShape(); | |||
| auto output_i_shape_size = output_i_shape.GetShapeSize(); | |||
| if (output_i_shape_size < 0) { | |||
| GELOGD("Node %s output shape is unknown, cannot infer value range, shape is %s.", node->GetName().c_str(), | |||
| formats::ShapeToString(output_i_shape).c_str()); | |||
| return GRAPH_NOT_CHANGED; | |||
| } | |||
| std::vector<std::pair<int64_t, int64_t>> output_i_value_range(output_i_shape_size, {1, -1}); | |||
| if (output_i_shape.IsScalar()) { | |||
| output_i_value_range.emplace_back(1, -1); | |||
| } | |||
| output_desc->SetValueRange(output_i_value_range); | |||
| GELOGD("Node %s output %zu shape is %s, the generated worst value range is %s.", node->GetName().c_str(), i, | |||
| formats::ShapeToString(output_i_shape).c_str(), formats::RangeToString(output_i_value_range).c_str()); | |||
| } | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| template <typename T> | |||
| graphStatus InferValueRangePass::ConstructData(const GeTensorDesc &tensor_desc, bool use_floor_value, | |||
| GeTensorPtr &output_ptr) { | |||
| std::vector<std::pair<int64_t, int64_t>> value_range; | |||
| (void)tensor_desc.GetValueRange(value_range); | |||
| size_t value_range_data_num = value_range.size(); | |||
| auto tensor_shape = tensor_desc.GetShape(); | |||
| bool value_range_and_tensor_shape_matched = true; | |||
| if (tensor_shape.IsScalar()){ | |||
| // scalar tensor has only one value_range pair | |||
| if (value_range_data_num != 1) { | |||
| value_range_and_tensor_shape_matched = false; | |||
| } | |||
| } else { | |||
| // normal tensor, value_range size is equal to tensor shape size. | |||
| if (static_cast<int64_t>(value_range_data_num) != tensor_shape.GetShapeSize()) { | |||
| value_range_and_tensor_shape_matched = false; | |||
| } | |||
| } | |||
| if (!value_range_and_tensor_shape_matched) { | |||
| GELOGW("Input %s value range and tensor shape do not match. Value range size is %zu, tensor shape is %s.", | |||
| tensor_desc.GetName().c_str(), value_range_data_num, formats::ShapeToString(tensor_shape).c_str()); | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| unique_ptr<T[]> buf(new (std::nothrow) T[value_range_data_num]()); | |||
| if (buf == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "New buf failed"); | |||
| GELOGE(MEMALLOC_FAILED, "New buf failed"); | |||
| return GRAPH_FAILED; | |||
| } | |||
| for (size_t j = 0; j < value_range_data_num; ++j) { | |||
| auto value_range_j = use_floor_value ? value_range[j].first : value_range[j].second; | |||
| buf[j] = static_cast<T>(value_range_j); | |||
| } | |||
| if (output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), value_range_data_num * sizeof(T)) != GRAPH_SUCCESS) { | |||
| GELOGW("Set data failed while constructing value range input tensor."); | |||
| return GRAPH_NOT_CHANGED; | |||
| } | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| graphStatus InferValueRangePass::ConstructDataByType(const GeTensorDesc &tensor_desc, bool use_floor_value, | |||
| GeTensorPtr &output_ptr) { | |||
| graphStatus ret = GRAPH_SUCCESS; | |||
| auto data_type = tensor_desc.GetDataType(); | |||
| output_ptr->MutableTensorDesc().SetDataType(data_type); | |||
| switch (data_type) { | |||
| case DT_FLOAT: | |||
| ret = ConstructData<float>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_DOUBLE: | |||
| ret = ConstructData<double>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_UINT8: | |||
| ret = ConstructData<uint8_t>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_INT8: | |||
| ret = ConstructData<int8_t>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_UINT16: | |||
| ret = ConstructData<uint16_t>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_INT16: | |||
| ret = ConstructData<int16_t>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_INT32: | |||
| ret = ConstructData<int32_t>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| case DT_INT64: | |||
| ret = ConstructData<int64_t>(tensor_desc, use_floor_value, output_ptr); | |||
| break; | |||
| default: | |||
| GELOGW("Data type:%s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| ret = GRAPH_PARAM_INVALID; | |||
| } | |||
| return ret; | |||
| } | |||
| vector<ConstGeTensorPtr> InferValueRangePass::ConstructInputTensors(const NodePtr &node, bool use_floor_value) { | |||
| vector<ConstGeTensorPtr> input_tensors; | |||
| auto cur_op_desc = node->GetOpDesc(); | |||
| auto in_data_anchors = node->GetAllInDataAnchors(); | |||
| for (size_t i = 0; i < in_data_anchors.size(); ++i) { | |||
| auto peer_out_anchor = in_data_anchors.at(i)->GetPeerOutAnchor(); | |||
| if (peer_out_anchor == nullptr) { | |||
| continue; | |||
| } | |||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||
| if (peer_node == nullptr) { | |||
| continue; | |||
| } | |||
| // construct input tensor by constant node | |||
| if ((peer_node->GetType() == CONSTANT) || (peer_node->GetType() == CONSTANTOP)) { | |||
| vector<GeTensorPtr> const_weight = OpDescUtils::MutableWeights(peer_node); | |||
| if (const_weight.empty()) { | |||
| GELOGW("MutableWeights failed, weight is empty, node: %s(%s)", peer_node->GetName().c_str(), | |||
| peer_node->GetType().c_str()); | |||
| return vector<ConstGeTensorPtr>(); | |||
| } | |||
| // const/constant op has only one weight | |||
| if (const_weight.at(0) == nullptr) { | |||
| GELOGW("MutableWeights failed, weight of constant is null, node name: %s(%s)", | |||
| peer_node->GetName().c_str(), peer_node->GetType().c_str()); | |||
| return vector<ConstGeTensorPtr>(); | |||
| } | |||
| input_tensors.push_back(const_weight.at(0)); | |||
| GELOGD("Node %s construct input tensor %zu by constant node.", node->GetName().c_str(), input_tensors.size()); | |||
| continue; | |||
| } | |||
| // construct input tensor by boundary of value range | |||
| const auto &input_tensor_desc = cur_op_desc->GetInputDesc(i); | |||
| GeTensorPtr tmp_tensor_ptr = MakeShared<GeTensor>(input_tensor_desc); | |||
| if (tmp_tensor_ptr == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Make shared failed"); | |||
| GELOGE(MEMALLOC_FAILED, "Make shared failed"); | |||
| return vector<ConstGeTensorPtr>(); | |||
| } | |||
| auto ret = ConstructDataByType(input_tensor_desc, use_floor_value, tmp_tensor_ptr); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| GELOGW("Construct input tensor by boundary of value range failed for input %s.", | |||
| input_tensor_desc.GetName().c_str()); | |||
| return vector<ConstGeTensorPtr>(); | |||
| } | |||
| input_tensors.push_back(tmp_tensor_ptr); | |||
| GELOGD("Node %s construct input tensor %zu by input desc value range.", node->GetName().c_str(), | |||
| input_tensors.size()); | |||
| } | |||
| return input_tensors; | |||
| } | |||
| graphStatus InferValueRangePass::ConstructInputAndInferValueRange(NodePtr &node) { | |||
| auto inputs = ConstructInputTensors(node, true); | |||
| if (inputs.empty()) { | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| vector<GeTensorPtr> lower_boundary_outputs; | |||
| auto ret = RunCpuKernelForValueRange(node, inputs, lower_boundary_outputs); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("Node %s run cpu kernel failed while calculating value range.", node->GetName().c_str()); | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| inputs = ConstructInputTensors(node, false); | |||
| if (inputs.empty()) { | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| vector<GeTensorPtr> upper_boundary_outputs; | |||
| ret = RunCpuKernelForValueRange(node, inputs, upper_boundary_outputs); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("Node %s run cpu kernel failed while calculating value range.", node->GetName().c_str()); | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| // construct value range from output tensor | |||
| OpDescPtr node_desc = node->GetOpDesc(); | |||
| std::vector<std::pair<int64_t, int64_t>> output_tensor_value_range; | |||
| size_t node_output_desc_size = node_desc->GetOutputsSize(); | |||
| for (size_t i = 0; i < node_output_desc_size; ++i) { | |||
| output_tensor_value_range.clear(); | |||
| auto output_tensor_desc = node_desc->MutableOutputDesc(i); | |||
| auto output_shape_size = output_tensor_desc->GetShape().GetShapeSize(); | |||
| auto lower_boundary_tensor = lower_boundary_outputs[i]; | |||
| auto lower_boundary_shape = lower_boundary_tensor->GetTensorDesc().GetShape(); | |||
| auto upper_boundary_tensor = upper_boundary_outputs[i]; | |||
| auto upper_boundary_shape = upper_boundary_tensor->GetTensorDesc().GetShape(); | |||
| if (lower_boundary_shape.GetShapeSize() != output_shape_size || | |||
| upper_boundary_shape.GetShapeSize() != output_shape_size) { | |||
| GELOGD( | |||
| "Cpu kernel result shapes %s, %s and output shape %s do not match, can not infer value range for output %s.", | |||
| formats::ShapeToString(lower_boundary_shape).c_str(), formats::ShapeToString(upper_boundary_shape).c_str(), | |||
| formats::ShapeToString(output_tensor_desc->GetShape()).c_str(), output_tensor_desc->GetName().c_str()); | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| auto data_type = output_tensor_desc->GetDataType(); | |||
| switch (data_type) { | |||
| GET_DATA_BY_DTYPE(DT_INT8, int8_t) | |||
| GET_DATA_BY_DTYPE(DT_INT16, int16_t) | |||
| GET_DATA_BY_DTYPE(DT_INT32, int32_t) | |||
| GET_DATA_BY_DTYPE(DT_INT64, int64_t) | |||
| GET_DATA_BY_DTYPE(DT_UINT8, uint8_t) | |||
| GET_DATA_BY_DTYPE(DT_UINT16, uint16_t) | |||
| GET_DATA_BY_DTYPE(DT_UINT32, uint32_t) | |||
| GET_DATA_BY_DTYPE(DT_UINT64, uint64_t) | |||
| GET_DATA_BY_DTYPE(DT_FLOAT, float) | |||
| GET_DATA_BY_DTYPE(DT_DOUBLE, double) | |||
| default: | |||
| GELOGW("Data type:%s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| output_tensor_desc->SetValueRange(output_tensor_value_range); | |||
| GELOGD("Node %s calculates output %zu value range %s by running cpu kernel.", node->GetName().c_str(), i, | |||
| formats::RangeToString(output_tensor_value_range).c_str()); | |||
| } | |||
| return GRAPH_SUCCESS; | |||
| } | |||
| template <typename T> | |||
| void InferValueRangePass::ConstructValueRange(const GeTensorPtr &left_tensor, const GeTensorPtr &right_tensor, | |||
| std::vector<std::pair<int64_t, int64_t>> &value_range) { | |||
| auto x = reinterpret_cast<const T *>(left_tensor->GetData().GetData()); | |||
| auto y = reinterpret_cast<const T *>(right_tensor->GetData().GetData()); | |||
| if (x == nullptr || y == nullptr) { | |||
| GELOGI("Output tensor of cpu kernel does not have data, no way to set value range."); | |||
| return; | |||
| } | |||
| auto left_tensor_shape = left_tensor->GetTensorDesc().GetShape(); | |||
| for (auto j = 0; j < left_tensor_shape.GetShapeSize(); ++j) { | |||
| auto left = static_cast<int64_t>(*(x + j)); | |||
| auto right = static_cast<int64_t>(*(y + j)); | |||
| value_range.emplace_back(left, right); | |||
| } | |||
| if (left_tensor_shape.IsScalar()) { | |||
| GELOGD("When inferring value range, output tensors of cpu kernel are scalar tensors."); | |||
| value_range.emplace_back(static_cast<int64_t>(*x), static_cast<int64_t>(*y)); | |||
| } | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,49 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GRAPH_PASSES_INFER_VALUE_RANGE_PASS_H_ | |||
| #define GE_GRAPH_PASSES_INFER_VALUE_RANGE_PASS_H_ | |||
| #include "graph/passes/infer_base_pass.h" | |||
| namespace ge { | |||
| class InferValueRangePass : public InferBasePass { | |||
| public: | |||
| graphStatus Infer(NodePtr &node) override; | |||
| private: | |||
| std::string SerialTensorInfo(const GeTensorDescPtr &tensor_desc) const override; | |||
| graphStatus UpdateTensorDesc(const GeTensorDescPtr &src, GeTensorDescPtr &dst, bool &changed) override; | |||
| graphStatus UpdateOutputFromSubgraphs(const std::vector<GeTensorDescPtr> &src, GeTensorDescPtr &dst) override; | |||
| graphStatus UpdateOutputFromSubgraphsForMultiDims(const std::vector<GeTensorDescPtr> &src, | |||
| GeTensorDescPtr &dst) override; | |||
| bool NeedInfer(const NodePtr &node) const override; | |||
| bool InputIsDynamic(const NodePtr &node) const; | |||
| bool InputIsConstOrHasValueRange(const NodePtr &node) const; | |||
| bool InputHasUnknownValueRange(const NodePtr &node) const; | |||
| graphStatus GenerateWorstValueRange(NodePtr &node); | |||
| template <typename T> | |||
| graphStatus ConstructData(const GeTensorDesc &tensor_desc, bool use_floor_value, GeTensorPtr &output_ptr); | |||
| graphStatus ConstructDataByType(const GeTensorDesc &tensor_desc, bool use_floor_value, GeTensorPtr &output_ptr); | |||
| vector<ConstGeTensorPtr> ConstructInputTensors(const NodePtr &node, bool use_floor_value); | |||
| template <typename T> | |||
| void ConstructValueRange(const GeTensorPtr &left_tensor, const GeTensorPtr &right_tensor, | |||
| std::vector<std::pair<int64_t, int64_t>> &value_range); | |||
| graphStatus ConstructInputAndInferValueRange(NodePtr &node); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_INFER_VALUE_RANGE_PASS_H_ | |||
| @@ -16,8 +16,6 @@ | |||
| #include "graph/passes/mark_force_unknown_for_cond_pass.h" | |||
| #include <queue> | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/common/omg_util.h" | |||
| @@ -26,17 +24,7 @@ namespace { | |||
| inline bool IsMergeInLoop(const NodePtr &node) { | |||
| const static std::set<std::string> kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; | |||
| std::string node_type; | |||
| (void)GetOriginalType(node, node_type); | |||
| return kLoopMergeInputs.count(node_type) > 0; | |||
| } | |||
| inline bool IsSwitchInLoop(const NodePtr &node) { | |||
| const static std::set<std::string> kLoopSwitchInputs{ MERGE, REFMERGE, LOOPCOND }; | |||
| std::string node_type; | |||
| (void)GetOriginalType(node, node_type); | |||
| return kLoopSwitchInputs.count(node_type) > 0; | |||
| return kLoopMergeInputs.count(NodeUtils::GetNodeType(node)) > 0; | |||
| } | |||
| } | |||
| @@ -44,10 +32,7 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||
| GELOGD("MarkForceUnknownForCondPass Enter"); | |||
| std::map<NodePtr, std::vector<NodePtr>> switch_groups; | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| std::string node_type; | |||
| GE_CHK_STATUS_RET(GetOriginalType(node, node_type), | |||
| "[Get][OriginalType] of node in graph:%s failed.", graph->GetName().c_str()); | |||
| if (kMergeOpTypes.count(node_type) == 0) { | |||
| if (kMergeOpTypes.count(NodeUtils::GetNodeType(node)) == 0) { | |||
| continue; | |||
| } | |||
| @@ -64,6 +49,51 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||
| return SUCCESS; | |||
| } | |||
| /// | |||
| /// @brief Deal with Switch node for LoopCond | |||
| /// @param [in] Switch node | |||
| /// @param [in] dest span | |||
| /// @param [out] Search queue | |||
| /// @return true: Switch In while loop / false: Not in while Loop. | |||
| /// | |||
| bool MarkForceUnknownForCondPass::DealAsLoopSwitch(const NodePtr &node, uint32_t dst_span, | |||
| std::queue<std::pair<NodePtr, uint32_t>> &search_queue) { | |||
| /// LoopCond --->\. | |||
| /// \. | |||
| /// Enter-----------+ \. | |||
| /// +--> Merge --> Switch --> Exit | |||
| /// NextIteration---+ | |||
| const auto is_loop_op = [](const NodePtr &n) { | |||
| return NodeUtils::GetNodeType(n) == LOOPCOND; | |||
| }; | |||
| const auto is_exit_op = [](const NodePtr &n) { | |||
| return kExitOpTypes.count(NodeUtils::GetNodeType(n)) > 0; | |||
| }; | |||
| const auto src_nodes = node->GetInAllNodes(); | |||
| const auto dst_nodes = node->GetOutAllNodes(); | |||
| if (std::none_of(src_nodes.begin(), src_nodes.end(), is_loop_op) && | |||
| std::none_of(dst_nodes.begin(), dst_nodes.end(), is_exit_op)) { | |||
| return false; | |||
| } | |||
| for (const auto &m : src_nodes) { | |||
| if (kMergeOpTypes.count(NodeUtils::GetNodeType(m)) > 0) { | |||
| for (const auto &n : m->GetInAllNodes()) { | |||
| if (kNextIterationOpTypes.count(NodeUtils::GetNodeType(n)) > 0) { | |||
| continue; | |||
| } | |||
| search_queue.push({n, dst_span}); | |||
| GELOGD("Travel in Loop: %s <-- %s <-- %s, span is: %u", node->GetName().c_str(), m->GetName().c_str(), | |||
| n->GetName().c_str(), dst_span); | |||
| } | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| /// | |||
| /// @brief Mark force unknown shape for Switch node | |||
| /// @param [in] merge node | |||
| @@ -72,6 +102,7 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||
| /// | |||
| void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std::vector<NodePtr> &switch_group) { | |||
| // Switch --> {Switch --> Merge} --> Merge | |||
| GELOGD("Search Switch node for Merge: %s", node->GetName().c_str()); | |||
| std::unordered_set<NodePtr> nodes_seen; | |||
| std::queue<std::pair<NodePtr, uint32_t>> search_queue({{node, 0}}); | |||
| while (!search_queue.empty()) { | |||
| @@ -79,43 +110,25 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: | |||
| const auto dst_span = search_queue.front().second; | |||
| search_queue.pop(); | |||
| // Switch --> Identity --> Constant | |||
| for (const auto &in_node : dst_node->GetInControlNodes()) { | |||
| if (nodes_seen.count(in_node) > 0) { | |||
| GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | |||
| continue; | |||
| } | |||
| nodes_seen.insert(in_node); | |||
| if (in_node->GetType() == IDENTITY) { | |||
| GELOGD("Travel node: %s, In control: %s, span is: %u", dst_node->GetName().c_str(), | |||
| in_node->GetName().c_str(), dst_span); | |||
| search_queue.push({in_node, dst_span}); | |||
| } | |||
| } | |||
| for (const auto &in_node : dst_node->GetInDataNodes()) { | |||
| for (const auto &in_node : dst_node->GetInAllNodes()) { | |||
| if (nodes_seen.count(in_node) > 0) { | |||
| GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | |||
| continue; | |||
| } | |||
| nodes_seen.insert(in_node); | |||
| std::string node_type; | |||
| (void)GetOriginalType(in_node, node_type); | |||
| const std::string node_type = NodeUtils::GetNodeType(in_node); | |||
| GELOGD("Travel node: %s, %s node: %s, span is: %u", dst_node->GetName().c_str(), node_type.c_str(), | |||
| in_node->GetName().c_str(), dst_span); | |||
| if (kSwitchOpTypes.count(node_type) > 0) { // Switch input node. | |||
| if (DealAsLoopSwitch(in_node, dst_span, search_queue)) { | |||
| continue; | |||
| } | |||
| if (dst_span > 0) { | |||
| search_queue.push({in_node, dst_span - 1}); | |||
| } else { | |||
| const auto &all_in_nodes = in_node->GetInDataNodes(); | |||
| if (std::any_of(all_in_nodes.begin(), all_in_nodes.end(), IsSwitchInLoop)) { | |||
| GELOGW("Travel node: %s, %s node: %s, Skip LoopCond switch", dst_node->GetName().c_str(), node_type.c_str(), | |||
| in_node->GetName().c_str()); | |||
| } else { | |||
| switch_group.emplace_back(in_node); | |||
| } | |||
| switch_group.emplace_back(in_node); | |||
| } | |||
| } else if (kMergeOpTypes.count(node_type) > 0) { // Merge input node. | |||
| search_queue.push({in_node, dst_span + 1}); | |||
| @@ -19,12 +19,23 @@ | |||
| #include "inc/graph_pass.h" | |||
| #include <queue> | |||
| namespace ge { | |||
| class MarkForceUnknownForCondPass : public GraphPass { | |||
| public: | |||
| Status Run(ComputeGraphPtr graph); | |||
| private: | |||
| /// | |||
| /// @brief Deal with Switch node for LoopCond | |||
| /// @param [in] Switch node | |||
| /// @param [in] dest span | |||
| /// @param [out] Search queue | |||
| /// @return true: Switch In while loop / false: Not in while Loop. | |||
| /// | |||
| bool DealAsLoopSwitch(const NodePtr &node, uint32_t dst_span, std::queue<std::pair<NodePtr, uint32_t>> &search_queue); | |||
| /// | |||
| /// @brief Mark force unknown shape for Switch node | |||
| /// @param [in] merge node | |||
| @@ -24,7 +24,9 @@ using std::string; | |||
| namespace ge { | |||
| namespace { | |||
| const int64_t kLoopType = 1; | |||
| constexpr int64_t kLoopType = 1; | |||
| constexpr uint8_t kMaxTransOp = 3; | |||
| constexpr uint8_t kTransOpIoSize = 1; | |||
| } | |||
| Status NextIterationPass::Run(ComputeGraphPtr graph) { | |||
| @@ -287,18 +289,25 @@ void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group, i | |||
| std::string node_type; | |||
| for (const auto &switch_node : loop_group.switch_nodes) { | |||
| SetControlFlowGroup(switch_node, group_index); | |||
| for (const auto &node : switch_node->GetOutDataNodes()) { | |||
| (void)GetOriginalType(node, node_type); | |||
| if (kExitOpTypes.count(node_type) > 0) { | |||
| SetControlFlowGroup(node, group_index); | |||
| } else { | |||
| // For: Switch -> Cast -> Exit | |||
| for (const auto &n : node->GetOutDataNodes()) { | |||
| (void)GetOriginalType(n, node_type); | |||
| if (kExitOpTypes.count(node_type) > 0) { | |||
| SetControlFlowGroup(n, group_index); | |||
| } | |||
| for (auto node : switch_node->GetOutDataNodes()) { | |||
| // Switch --> Exit | |||
| // Switch --> Cast --> Exit | |||
| // Switch --> TransData --> Cast --> Exit | |||
| for (uint8_t i = 0; i < kMaxTransOp; ++i) { | |||
| if (node->GetInDataNodes().size() != kTransOpIoSize || node->GetAllOutDataAnchorsSize() != kTransOpIoSize) { | |||
| break; | |||
| } | |||
| if (kExitOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { | |||
| SetControlFlowGroup(node, group_index); | |||
| break; | |||
| } | |||
| const auto &all_nodes = node->GetOutAllNodes(); | |||
| if (all_nodes.size() != kTransOpIoSize) { | |||
| break; | |||
| } | |||
| node = all_nodes.at(0); | |||
| } | |||
| } | |||
| } | |||
| @@ -71,7 +71,7 @@ Status ReplaceWithEmptyConstPass::Run(NodePtr &node) { | |||
| GELOGI("Node %s Got empty output_desc_ptr, ignore current pass.", node->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| if (!IsEmptyTenor(output_desc_ptr->GetShape())) { | |||
| if (!IsKnownEmptyTenor(output_desc_ptr->GetShape())) { | |||
| is_all_output_empty = false; | |||
| break; | |||
| } | |||
| @@ -107,12 +107,16 @@ Status ReplaceWithEmptyConstPass::GetOutputsOfCurrNode(const NodePtr &node_to_re | |||
| return SUCCESS; | |||
| } | |||
| bool ReplaceWithEmptyConstPass::IsEmptyTenor(const GeShape &shape) const { | |||
| bool ReplaceWithEmptyConstPass::IsKnownEmptyTenor(const GeShape &shape) const { | |||
| bool is_known_empty_tensor = false; | |||
| for (auto dim : shape.GetDims()) { | |||
| if (dim == 0) { | |||
| return true; | |||
| if (dim < 0) { | |||
| // current dim is unknown dim, skip replace | |||
| return false; | |||
| } else if (dim == 0) { | |||
| is_known_empty_tensor = true; | |||
| } | |||
| } | |||
| return false; | |||
| return is_known_empty_tensor; | |||
| } | |||
| } // namespace ge | |||
| @@ -26,7 +26,7 @@ class ReplaceWithEmptyConstPass : public FoldingPass { | |||
| private: | |||
| Status GetOutputsOfCurrNode(const NodePtr &node_to_replace, vector<GeTensorPtr> &outputs); | |||
| bool IsEmptyTenor(const GeShape &shape) const; | |||
| bool IsKnownEmptyTenor(const GeShape &shape) const; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_GRAPH_PASSES_REPLACE_WITH_EMPTY_CONST_PASS_H_ | |||
| @@ -395,8 +395,9 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr & | |||
| peer_cond_anchor->GetOwnerNode()->GetName().c_str(), stream_switch->GetName().c_str()); | |||
| int64_t group_index = -1; | |||
| (void)AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); | |||
| SetControlFlowGroup(stream_switch, group_index); | |||
| if (AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { | |||
| SetControlFlowGroup(stream_switch, group_index); | |||
| } | |||
| return stream_switch; | |||
| } | |||
| @@ -54,6 +54,7 @@ | |||
| #include "graph/passes/hccl_group_pass.h" | |||
| #include "graph/passes/identity_pass.h" | |||
| #include "graph/passes/infershape_pass.h" | |||
| #include "graph/passes/infer_value_range_pass.h" | |||
| #include "graph/passes/merge_pass.h" | |||
| #include "graph/passes/net_output_pass.h" | |||
| #include "graph/passes/no_use_reshape_remove_pass.h" | |||
| @@ -2016,6 +2017,8 @@ Status GraphPrepare::InferShapeForPreprocess() { | |||
| names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | |||
| ConstantFoldingPass constant_folding_pass; | |||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | |||
| InferValueRangePass infer_value_pass; | |||
| names_to_passes.emplace_back("InferValuePass", &infer_value_pass); | |||
| int32_t dev_count = 0; | |||
| AicpuConstantFoldingPass aicpu_constant_folding_pass; | |||
| @@ -568,6 +568,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std: | |||
| } | |||
| std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | |||
| GE_CHECK_NOTNULL(aipp_params); | |||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | |||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||
| "[Get][Attr] %s from op:%s failed", ATTR_NAME_AIPP.c_str(), data_op->GetName().c_str()); | |||
| @@ -1206,7 +1206,7 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector<NodePtr> &start_ | |||
| auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); | |||
| if (!IsAllDimsPositive(dims)) { | |||
| REPORT_CALL_ERROR("E19999", "Failed to copy multi batch graph, the node %s still has unknown shape %s", | |||
| node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||
| node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Check][Param] Failed to copy multi batch graph, the node %s still has unknown shape %s", | |||
| node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||
| return INTERNAL_ERROR; | |||
| @@ -295,13 +295,15 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||
| } | |||
| } | |||
| tensor_desc->SetShape(shape); | |||
| args.input_desc[input_index] = tensor_desc; | |||
| GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); | |||
| GELOGD("Update shape[%s] of input[%zu] to [%s]", | |||
| shape.ToString().c_str(), input_index, tensor_desc->MutableShape().ToString().c_str()); | |||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), | |||
| "[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size," | |||
| "index = %zu, shape = [%s], model_id = %u.", | |||
| input_index, tensor_desc->GetShape().ToString().c_str(), model_id_); | |||
| GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size); | |||
| GELOGD("Input tensor[%zu] size = %ld", input_index, tensor_size); | |||
| TensorUtils::SetSize(*tensor_desc, tensor_size); | |||
| args.input_desc[input_index] = tensor_desc; | |||
| } | |||
| GE_CHECK_GE(tensor_size, 0); | |||
| @@ -33,9 +33,6 @@ HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, | |||
| } | |||
| HybridModelExecutor::~HybridModelExecutor() { | |||
| if (context_.rt_gen_context != nullptr) { | |||
| (void) rtCtxDestroy(context_.rt_gen_context); | |||
| } | |||
| } | |||
| Status HybridModelExecutor::Init() { | |||
| @@ -139,7 +136,6 @@ Status HybridModelExecutor::Cleanup() { | |||
| Status HybridModelExecutor::InitExecutionContext() { | |||
| GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | |||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | |||
| context_.global_step = model_->GetGlobalStep(); | |||
| @@ -191,7 +191,6 @@ HybridModelPipelineExecutor::HybridModelPipelineExecutor(HybridModel *model, uin | |||
| } | |||
| Status StageExecutor::InitExecutionContext() { | |||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | |||
| context_.model = model_; | |||
| @@ -326,17 +326,45 @@ std::shared_ptr<TaskContext> NodeState::GetTaskContext() { | |||
| } | |||
| void NodeState::SavePersistTensor(int input_idx, const TensorValue &tensor) { | |||
| if (node_item_->root_data_.count(input_idx) > 0) { | |||
| GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); | |||
| root_tensor_values_[input_idx] = tensor; | |||
| const auto is_persist_tensor = [](const std::map<const NodeItem *, std::set<int>> &items, int idx) { | |||
| const auto is_exist = [&idx](const std::pair<const NodeItem *, std::set<int>> &items) { | |||
| return items.second.count(idx) > 0; | |||
| }; | |||
| return std::any_of(items.begin(), items.end(), is_exist); | |||
| }; | |||
| if (root_tensor_values_.count(input_idx) > 0) { | |||
| return; | |||
| } | |||
| if (node_item_->enter_data_.count(input_idx) > 0) { | |||
| if (is_persist_tensor(node_item_->root_data_, input_idx)) { | |||
| GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); | |||
| root_tensor_values_[input_idx] = tensor; | |||
| } else if (is_persist_tensor(node_item_->enter_data_, input_idx)) { | |||
| GELOGD("[%s] Save Enter input tensor: %d", GetName().c_str(), input_idx); | |||
| root_tensor_values_[input_idx] = tensor; | |||
| } | |||
| } | |||
| void NodeState::UpdatePersistTensor() { | |||
| const auto update_tensor = [&](const std::map<const NodeItem *, std::set<int>> &items) { | |||
| for (const auto &item : items) { | |||
| for (const auto idx : item.second) { | |||
| UpdatePersistTensor(idx); | |||
| } | |||
| } | |||
| }; | |||
| if (root_tensor_values_.empty()) { | |||
| return; | |||
| } | |||
| update_tensor(node_item_->root_data_); | |||
| if (iteration_count_ > 0) { | |||
| update_tensor(node_item_->enter_data_); | |||
| } | |||
| } | |||
| void NodeState::UpdatePersistTensor(int input_idx) { | |||
| const auto it = root_tensor_values_.find(input_idx); | |||
| if (it == root_tensor_values_.end()) { | |||
| @@ -363,16 +391,9 @@ void NodeState::ResetContext(uint64_t iteration) { | |||
| data_scheduled_ = static_cast<uint32_t>(node_item_->root_data_.size()); | |||
| ctrl_scheduled_ = static_cast<uint32_t>(node_item_->root_ctrl_.size()); | |||
| for (auto item : node_item_->root_data_) { | |||
| UpdatePersistTensor(item.first); | |||
| } | |||
| if (iteration > 0) { | |||
| data_scheduled_ += static_cast<uint32_t>(node_item_->enter_data_.size()); | |||
| ctrl_scheduled_ += static_cast<uint32_t>(node_item_->enter_ctrl_.size()); | |||
| for (auto item : node_item_->enter_data_) { | |||
| UpdatePersistTensor(item.first); | |||
| } | |||
| } | |||
| iteration_count_ = iteration; | |||
| @@ -132,6 +132,7 @@ struct NodeState { | |||
| void RunNextIteration(); | |||
| void SavePersistTensor(int input_idx, const TensorValue &tensor); | |||
| void UpdatePersistTensor(); | |||
| Status NodeScheduled(const std::function<void(const NodeItem *)> &ready) const; | |||
| @@ -109,7 +109,6 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue | |||
| GE_CHECK_NOTNULL(output_desc); | |||
| output_desc->SetShape(tensor_desc->GetShape()); | |||
| output_desc->SetOriginShape(tensor_desc->GetOriginShape()); | |||
| output_desc->SetDataType(tensor_desc->GetDataType()); | |||
| node_state->SetSkipInferShape(true); | |||
| } | |||
| } | |||
| @@ -373,6 +373,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, | |||
| auto executor = node_item.node_executor; | |||
| GE_CHECK_NOTNULL(executor); | |||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start"); | |||
| node_state.UpdatePersistTensor(); | |||
| GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[Prepare][Task] for [%s] failed.", | |||
| node_state.GetName().c_str()); | |||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End"); | |||
| @@ -21,10 +21,17 @@ | |||
| namespace ge { | |||
| namespace hybrid { | |||
| Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext *context) { | |||
| const auto &node_item = *node_state.GetNodeItem(); | |||
| GE_CHECK_NOTNULL(context); | |||
| rtContext_t rt_gen_context = nullptr; | |||
| GE_CHK_RT_RET(rtCtxCreate(&rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||
| std::function<void()> callback = [&]() { | |||
| (void) rtCtxDestroy(rt_gen_context); | |||
| GE_CHK_RT(rtCtxSetCurrent(context->rt_context)); | |||
| }; | |||
| GE_MAKE_GUARD(rt_gen_context, callback); | |||
| const auto &node_item = *node_state.GetNodeItem(); | |||
| RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); | |||
| GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context)); | |||
| if (context->ge_context != nullptr) { | |||
| GetThreadLocalContext() = *context->ge_context; | |||
| @@ -196,9 +196,7 @@ Status HybridModelBuilder::CopyGraph() { | |||
| GELOGD("Copy compute graph begin."); | |||
| auto root_graph = ge_root_model_->GetRootGraph(); | |||
| ge_root_model_->IncreaseBuildTimes(); | |||
| std::string new_graph_name = ge_root_model_->GetRootGraph()->GetName() + "_" + | |||
| std::to_string(ge_root_model_->GetBuildTimes()); | |||
| std::string new_graph_name = ge_root_model_->GetRootGraph()->GetName(); | |||
| ComputeGraphPtr new_root_graph = MakeShared<ComputeGraph>(new_graph_name); | |||
| GE_CHECK_NOTNULL(new_root_graph); | |||
| int32_t depth = 0; | |||
| @@ -1046,6 +1044,7 @@ Status HybridModelBuilder::InitConstantOps() { | |||
| } else { | |||
| var_tensor.reset(new(std::nothrow)TensorValue(nullptr, 0)); | |||
| } | |||
| GE_CHECK_NOTNULL(var_tensor); | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | |||
| GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | |||
| @@ -24,6 +24,8 @@ | |||
| namespace ge { | |||
| namespace hybrid { | |||
| namespace { | |||
| const uint8_t kMaxTransCount = 3; | |||
| const uint32_t kTransOpIoSize = 1; | |||
| const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; | |||
| const char *const kNodeTypeRetVal = "_RetVal"; | |||
| const std::set<std::string> kControlOpTypes{ | |||
| @@ -39,6 +41,25 @@ const std::set<std::string> kMergeOpTypes{ | |||
| MERGE, REFMERGE, STREAMMERGE | |||
| }; | |||
| bool IsEnterFeedNode(NodePtr node) { | |||
| // For: Enter -> node | |||
| // For: Enter -> Cast -> node | |||
| // For: Enter -> TransData -> Cast -> node | |||
| for (uint8_t i = 0; i < kMaxTransCount; ++i) { | |||
| if (kEnterOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { | |||
| GELOGD("Node[%u] is Enter feed node.", node->GetName().c_str()); | |||
| return true; | |||
| } | |||
| const auto all_nodes = node->GetInDataNodes(); | |||
| if (all_nodes.size() != kTransOpIoSize || node->GetAllInDataAnchorsSize() != kTransOpIoSize) { | |||
| return false; | |||
| } | |||
| node = all_nodes.at(0); | |||
| } | |||
| return false; | |||
| } | |||
| Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { | |||
| uint32_t parent_index = 0; | |||
| if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
| @@ -395,11 +416,13 @@ void NodeItem::SetDataSend(NodeItem *node_item, int anchor_index) { | |||
| data_send_.emplace(node_item); | |||
| node_item->data_recv_[this] = anchor_index; | |||
| if (is_root_node_) { | |||
| node_item->root_data_[anchor_index] = this; | |||
| auto &data_anchors = node_item->root_data_[this]; | |||
| data_anchors.emplace(anchor_index); | |||
| } | |||
| // If Enter feed Not Merge, take as root Node. | |||
| if (IsEnterOp() && (node_item->node_type != STREAMMERGE)) { | |||
| node_item->enter_data_[anchor_index] = this; | |||
| if (IsEnterFeedNode(node) && (node_item->node_type != STREAMMERGE)) { | |||
| auto &data_anchors = node_item->enter_data_[this]; | |||
| data_anchors.emplace(anchor_index); | |||
| } | |||
| GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | |||
| } | |||
| @@ -417,7 +440,7 @@ void NodeItem::SetCtrlSend(NodeItem *node_item, uint32_t switch_index) { | |||
| node_item->root_ctrl_.emplace(this); | |||
| } | |||
| // If Enter feed control signal, take as root Node. | |||
| if (IsEnterOp() && (node_item->node_type != STREAMMERGE && node_item->node_type != STREAMACTIVE)) { | |||
| if (IsEnterFeedNode(node) && (node_item->node_type != STREAMMERGE && node_item->node_type != STREAMACTIVE)) { | |||
| node_item->enter_ctrl_.emplace(this); | |||
| } | |||
| GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | |||
| @@ -148,9 +148,9 @@ struct NodeItem { | |||
| int64_t frame_index_ = -1; | |||
| int64_t parent_frame_ = -1; | |||
| std::set<const NodeItem *> root_ctrl_; // Recv ctrl from root node | |||
| std::map<int, const NodeItem *> root_data_; // Recv data from root node | |||
| std::map<const NodeItem *, std::set<int>> root_data_; // Recv data from root node | |||
| std::set<const NodeItem *> enter_ctrl_; // Recv ctrl from Enter node | |||
| std::map<int, const NodeItem *> enter_data_; // Recv data from Enter node | |||
| std::map<const NodeItem *, std::set<int>> enter_data_; // Recv data from Enter node | |||
| std::set<const NodeItem *> data_send_; // Send data notify to | |||
| std::map<const NodeItem *, int> data_recv_; // Recv data notify from | |||
| std::set<const NodeItem *> ctrl_send_; // Send ctrl notify to | |||
| @@ -18,6 +18,7 @@ | |||
| #include "framework/common/taskdown_common.h" | |||
| #include "hybrid/executor/hybrid_execution_context.h" | |||
| #include "external/runtime/rt_error_codes.h" | |||
| #include "single_op/task/build_task_utils.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -196,6 +197,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
| GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | |||
| GE_CHECK_NOTNULL(context.GetExecutionContext()->model); | |||
| GELOGD("[DEBUG_TASK_INFO : Executor Task] %s/%s %s", | |||
| context.GetExecutionContext()->model->GetModelName().c_str(), | |||
| (*it)->GetName().empty() ? (*it)->GetLogName().c_str() : (*it)->GetName().c_str(), | |||
| BuildTaskUtils::GetTaskInfo(context).c_str()); | |||
| // save profiling data | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| @@ -208,7 +214,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| context.SetTaskId(task_id); | |||
| context.SetStreamId(stream_id); | |||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim(), (*it)->GetOpType()); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| } | |||
| @@ -33,6 +33,7 @@ namespace { | |||
| constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||
| constexpr char const *kAttrOpParamSize = "op_para_size"; | |||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | |||
| const string kAtomicOpType = "DynamicAtomicAddrClean"; | |||
| std::atomic<std::uint64_t> log_id(0); | |||
| } // namespace | |||
| @@ -51,6 +52,7 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||
| } | |||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| op_type_ = op_desc.GetType(); | |||
| log_name_ = op_desc.GetName() + "_tvmbin"; | |||
| log_id_ = log_id++; | |||
| auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | |||
| @@ -538,6 +540,10 @@ const std::string &AiCoreOpTask::GetName() const { | |||
| return stub_name_; | |||
| } | |||
| const std::string &AiCoreOpTask::GetOpType() const { | |||
| return op_type_; | |||
| } | |||
| std::string AiCoreOpTask::GetKeyForOpParamSize() const { | |||
| return kAttrOpParamSize; | |||
| } | |||
| @@ -631,6 +637,10 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co | |||
| return op_desc.GetName() + "_atomic_kernelname"; | |||
| } | |||
| const std::string &AtomicAddrCleanOpTask::GetOpType() const { | |||
| return kAtomicOpType; | |||
| } | |||
| Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | |||
| GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | |||
| GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | |||
| @@ -72,12 +72,16 @@ class AiCoreOpTask { | |||
| const std::string& GetName() const; | |||
| const std::string& GetLogName() const {return log_name_;} | |||
| bool GetClearAtomic() const {return clear_atomic_;} | |||
| uint32_t GetBlockDim() const {return block_dim_;} | |||
| void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | |||
| virtual const std::string& GetOpType() const; | |||
| protected: | |||
| Status UpdateTilingInfo(TaskContext &context); | |||
| virtual std::string GetKeyForOpParamSize() const; | |||
| @@ -117,12 +121,14 @@ class AiCoreOpTask { | |||
| uint64_t log_id_ = 0; | |||
| std::string log_name_; | |||
| uint32_t offset_ = 0; | |||
| std::string op_type_; | |||
| }; | |||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | |||
| public: | |||
| Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | |||
| Status UpdateArgs(TaskContext &task_context) override; | |||
| const std::string& GetOpType() const override; | |||
| protected: | |||
| std::string GetKeyForOpParamSize() const override; | |||
| @@ -207,7 +207,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
| context.SetTaskId(task_id); | |||
| context.SetStreamId(stream_id); | |||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0, node_type_); | |||
| auto callback = [=, &context]() { | |||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | |||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | |||
| @@ -460,10 +460,6 @@ Status TaskContext::PropagateOutputs() { | |||
| subgraph_context_->all_inputs_[input_offset].SetName( | |||
| node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); | |||
| } | |||
| auto dst_node_state = subgraph_context_->GetOrCreateNodeState(dst_node_item); | |||
| GE_CHECK_NOTNULL(dst_node_state); | |||
| dst_node_state->SavePersistTensor(dst_input_idx, *tensor); | |||
| } | |||
| } | |||
| (void)guard; | |||
| @@ -495,6 +491,7 @@ void TaskContext::ReleaseInputsAndOutputs() { | |||
| void TaskContext::ReleaseInput(int index) { | |||
| auto input_tensor = MutableInput(index); | |||
| if (input_tensor != nullptr) { | |||
| node_state_->SavePersistTensor(index, *input_tensor); | |||
| input_tensor->Destroy(); | |||
| GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index); | |||
| } | |||
| @@ -574,8 +571,8 @@ Status TaskContext::Synchronize() { | |||
| return execution_context_->Synchronize(GetStream()); | |||
| } | |||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| const std::string &task_type, uint32_t block_dim) { | |||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||
| uint32_t block_dim, const std::string &op_type) { | |||
| if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| @@ -589,7 +586,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||
| TaskDescInfo tmp_task_desc_info; | |||
| tmp_task_desc_info.model_name = dynamic_model_name; | |||
| tmp_task_desc_info.op_name = op_desc->GetName(); | |||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||
| tmp_task_desc_info.op_type = op_type; | |||
| tmp_task_desc_info.block_dim = block_dim; | |||
| tmp_task_desc_info.task_type = task_type; | |||
| tmp_task_desc_info.task_id = task_id; | |||
| @@ -118,8 +118,8 @@ class TaskContext { | |||
| void *handle_ = nullptr; | |||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| const std::string &task_type, uint32_t block_dim); | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||
| uint32_t block_dim, const std::string &op_type); | |||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
| private: | |||
| @@ -50,6 +50,8 @@ const std::set<std::string> kBufferOptimizeSupportOption = {"l1_optimize", "l2_o | |||
| const char *const kBufferOptimizeSupport = "only support l2_optimize, off_optimize"; | |||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT = "high_performance"; | |||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_PRECISON = "high_precision"; | |||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PRECISION_FOR_ALL = "high_precision_for_all"; | |||
| const char *const IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PERFORMANCE_FOR_ALL = "high_performance_for_all"; | |||
| const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; | |||
| const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; | |||
| const char *const kSplitError1 = "size not equal to 2 split by \":\""; | |||
| @@ -57,7 +59,8 @@ const char *const kEmptyError = "can not be empty"; | |||
| const char *const kFloatNumError = "exist float number"; | |||
| const char *const kDigitError = "is not digit"; | |||
| const char *const kCompressWeightError = "it must be appointed when appoint parameter[--optypelist_for_implmode]"; | |||
| const char *const kSelectImplmodeError = "only support high_performance, high_precision"; | |||
| const char *const kSelectImplmodeError = "only support high_performance, high_precision, " | |||
| "high_precision_for_all, high_performance_for_all"; | |||
| const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \""; | |||
| const char *const kDynamicImageSizeError = "It can only contains digit, \",\", \" \" and \";\""; | |||
| const char *const kKeepDtypeError = "file not found"; | |||
| @@ -782,7 +785,9 @@ Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std:: | |||
| op_select_implmode = IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT; | |||
| } else { | |||
| if (op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT && | |||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_PRECISON) { | |||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_PRECISON && | |||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PRECISION_FOR_ALL && | |||
| op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_HIGH_PERFORMANCE_FOR_ALL) { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||
| {"--op_select_implmode", op_select_implmode.c_str(), | |||
| kSelectImplmodeError}); | |||
| @@ -60,10 +60,6 @@ class GeRootModel { | |||
| bool GetTrainFlag() const { return train_flag_; } | |||
| int32_t GetBuildTimes() const { return hybrid_build_times_; } | |||
| void IncreaseBuildTimes() { hybrid_build_times_++; } | |||
| private: | |||
| ComputeGraphPtr root_graph_ = nullptr; | |||
| std::map<std::string, GeModelPtr> subgraph_instance_name_to_model_; | |||
| @@ -73,7 +69,6 @@ class GeRootModel { | |||
| bool train_flag_ = false; | |||
| std::string model_name_; | |||
| bool is_specific_stream_ = false; | |||
| int32_t hybrid_build_times_ = 0; | |||
| }; | |||
| } // namespace ge | |||
| using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | |||
| @@ -143,7 +143,8 @@ DEFINE_string(output_type, "", | |||
| DEFINE_string(op_select_implmode, "", | |||
| "Optional; op select implmode! " | |||
| "Support high_precision, high_performance."); | |||
| "Support high_precision, high_performance, " | |||
| "high_precision_for_all, high_performance_for_all."); | |||
| DEFINE_string(optypelist_for_implmode, "", | |||
| "Optional; Nodes need use implmode selected in op_select_implmode " | |||
| @@ -311,8 +312,8 @@ class GFlagUtils { | |||
| "scenarios by using a configuration file.\n" | |||
| " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" | |||
| " --op_bank_path Set the path of the custom repository generated after operator tuning with Auto Tune.\n" | |||
| " --op_select_implmode Set op select implmode. Support high_precision, high_performance. " | |||
| "default: high_performance\n" | |||
| " --op_select_implmode Set op select implmode. Support high_precision, high_performance, " | |||
| "high_precision_for_all, high_performance_for_all. default: high_performance\n" | |||
| " --optypelist_for_implmode Appoint which op to select implmode, cooperated with op_select_implmode.\n" | |||
| " Separate multiple nodes with commas (,). Use double quotation marks (\") " | |||
| "to enclose each argument. E.g.: \"node_name1,node_name2\"\n" | |||
| @@ -121,7 +121,7 @@ Status InnerSession::Initialize() { | |||
| GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | |||
| DumpProperties dump_properties; | |||
| dump_properties.InitByOptions(); | |||
| GE_CHK_STATUS_RET(dump_properties.InitByOptions(), "Init dump properties failed."); | |||
| GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "[Add][DumpProperties] failed."); | |||
| ret = graph_manager_.Initialize(options_); | |||
| @@ -297,6 +297,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||
| for (auto &task : tasks_) { | |||
| ret = task->LaunchKernel(stream_); | |||
| GELOGD("[DEBUG_TASK_INFO : Static Task] %s %s", | |||
| task->GetTaskName().c_str(), | |||
| BuildTaskUtils::GetTaskInfo(task->GetOpdesc(), inputs, outputs).c_str()); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| @@ -447,6 +450,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
| } | |||
| GELOGD("[DEBUG_TASK_INFO : Dynamic Task] %s", | |||
| BuildTaskUtils::GetTaskInfo(op_task_->GetOpdesc(), input_buffers, output_buffers).c_str()); | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | |||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||
| return SUCCESS; | |||
| @@ -95,35 +95,6 @@ Status CheckInferDepend(GeModelPtr &ge_model, bool &is_infer_depend, bool &is_ho | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) { | |||
| bool is_infer_depend = false; | |||
| bool is_host_mem = false; | |||
| GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); | |||
| bool need_d2h_cpy = is_infer_depend && !is_host_mem; | |||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||
| int32_t kernel_task_num = 0; | |||
| for (int i = 0; i < tasks.size(); ++i) { | |||
| auto task_type = static_cast<rtModelTaskType_t>(tasks[i].type()); | |||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? tasks[i].kernel().context() : | |||
| tasks[i].kernel_with_handle().context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type == ccKernelType::TE) { | |||
| if (need_d2h_cpy) { | |||
| flag = true; | |||
| return SUCCESS; | |||
| } | |||
| kernel_task_num++; | |||
| if (kernel_task_num > 1) { | |||
| flag = true; | |||
| return SUCCESS; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace | |||
| SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | |||
| @@ -620,29 +591,69 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, | |||
| return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | |||
| } | |||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
| if (single_op.op_task_ != nullptr) { | |||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); | |||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||
| } | |||
| GELOGD("Building AICPU_TF task"); | |||
| AiCpuTask *aicpu_task = nullptr; | |||
| uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||
| GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); | |||
| if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { | |||
| if (i >= tasks.size() - 1) { | |||
| if (aicpu_tasks_.size() < 2) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); | |||
| REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| ++i; | |||
| const TaskDef ©_task_def = tasks[i]; | |||
| const TaskDef ©_task_def = aicpu_tasks_[1]; | |||
| GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | |||
| } | |||
| aicpu_task->SetModelArgs(model_name_, model_id_); | |||
| single_op.op_task_.reset(aicpu_task); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status SingleOpModel::NeedHybridModel(GeModelPtr &ge_model, bool &need_hybrid_model) { | |||
| bool is_infer_depend = false; | |||
| bool is_host_mem = false; | |||
| GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); | |||
| bool need_d2h_cpy = is_infer_depend && !is_host_mem; | |||
| bool aicpu_multi_task = tbe_tasks_.size() >= 1 && aicpu_tasks_.size() >= 1; | |||
| bool aicore_multi_task = tbe_tasks_.size() > 1; | |||
| need_hybrid_model = need_d2h_cpy || aicore_multi_task || aicpu_multi_task; | |||
| return SUCCESS; | |||
| } | |||
| Status SingleOpModel::ParseTasks() { | |||
| auto ge_model = model_helper_.GetGeModel(); | |||
| GE_CHECK_NOTNULL(ge_model); | |||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||
| for (int i = 0; i < tasks.size(); ++i) { | |||
| TaskDef &task_def = tasks[i]; | |||
| GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), | |||
| task_def.DebugString().c_str()); | |||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||
| const auto &kernel_def = task_def.kernel(); | |||
| const auto &context = kernel_def.context(); | |||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||
| if (kernel_type == ccKernelType::TE) { | |||
| tbe_tasks_.emplace_back(task_def); | |||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||
| aicpu_tasks_.emplace_back(task_def); | |||
| } else { | |||
| GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | |||
| "[Check][Param:TaskDef]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", | |||
| context.kernel_type()); | |||
| REPORT_INNER_ERROR("E19999", | |||
| "BuildModelTaskKernel fail for got:%u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.", | |||
| context.kernel_type()); | |||
| return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | |||
| } | |||
| } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||
| tbe_tasks_.emplace_back(task_def); | |||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
| aicpu_tasks_.emplace_back(task_def); | |||
| } else { | |||
| // skip | |||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
| @@ -657,6 +668,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | |||
| model_params_.memory_size = UINT64_MAX; | |||
| model_params_.graph_is_dynamic = true; | |||
| GE_CHK_STATUS_RET(ParseTasks(), "[Parse][Tasks] failed."); | |||
| auto ge_model = model_helper_.GetGeModel(); | |||
| GE_CHECK_NOTNULL(ge_model); | |||
| @@ -76,6 +76,11 @@ class SingleOpModel { | |||
| void ParseArgTable(OpTask *task, SingleOp &op); | |||
| Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | |||
| Status SetHostMemTensor(DynamicSingleOp &single_op); | |||
| Status NeedHybridModel(GeModelPtr &ge_model, bool &flag); | |||
| Status ParseTasks(); | |||
| std::vector<domi::TaskDef> tbe_tasks_; | |||
| std::vector<domi::TaskDef> aicpu_tasks_; | |||
| std::string model_name_; | |||
| uint32_t model_id_ = 0; | |||
| @@ -70,7 +70,9 @@ std::vector<void *> BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, | |||
| return JoinAddresses(addresses); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| std::string BuildTaskUtils::InnerGetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<const void *> &input_addrs, | |||
| const std::vector<const void *> &output_addrs) { | |||
| std::stringstream ss; | |||
| if (op_desc != nullptr) { | |||
| auto op_type = op_desc->GetType(); | |||
| @@ -87,7 +89,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| } | |||
| ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | |||
| ss << TypeUtils::FormatToSerialString(input->GetFormat()); | |||
| ss << VectorToString(input->GetShape().GetDims()); | |||
| ss << VectorToString(input->GetShape().GetDims()) << " "; | |||
| if (idx < input_addrs.size()) { | |||
| ss << input_addrs[idx]; | |||
| } | |||
| if (idx < op_desc->GetInputsSize() - 1) { | |||
| ss << ","; | |||
| } | |||
| @@ -101,7 +106,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| const GeShape &out_shape = output->GetShape(); | |||
| const auto &dims = out_shape.GetDims(); | |||
| ss << TypeUtils::FormatToSerialString(out_format); | |||
| ss << VectorToString(dims); | |||
| ss << VectorToString(dims) << " "; | |||
| if (idx < output_addrs.size()) { | |||
| ss << output_addrs[idx]; | |||
| } | |||
| if (idx < op_desc->GetOutputsSize() - 1) { | |||
| ss << ","; | |||
| } | |||
| @@ -110,4 +118,44 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| } | |||
| return ss.str(); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| vector<const void *> input_addrs; | |||
| vector<const void *> output_addrs; | |||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<DataBuffer> &inputs, | |||
| const std::vector<DataBuffer> &outputs) { | |||
| vector<const void *> input_addrs; | |||
| vector<const void *> output_addrs; | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||
| if (op_desc->GetAllInputsSize() == inputs.size()) { | |||
| std::for_each(inputs.begin(), inputs.end(), [&](const DataBuffer &db) { input_addrs.push_back(db.data); }); | |||
| } | |||
| if (op_desc->GetOutputsSize() == outputs.size()) { | |||
| std::for_each(outputs.begin(), outputs.end(), [&](const DataBuffer &db) { output_addrs.push_back(db.data); }); | |||
| } | |||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const hybrid::TaskContext &task_context) { | |||
| auto &node_item = task_context.GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||
| vector<const void *> input_addrs; | |||
| vector<const void *> output_addrs; | |||
| if (op_desc->GetAllInputsSize() == static_cast<uint32_t>(task_context.NumInputs())) { | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| input_addrs.push_back(task_context.GetInput(i)->GetData()); | |||
| } | |||
| } | |||
| if (op_desc->GetOutputsSize() == static_cast<uint32_t>(task_context.NumOutputs())) { | |||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||
| output_addrs.push_back(task_context.GetOutput(i)->GetData()); | |||
| } | |||
| } | |||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
| } | |||
| } // namespace ge | |||
| @@ -23,6 +23,7 @@ | |||
| #include "graph/op_desc.h" | |||
| #include "single_op/single_op.h" | |||
| #include "single_op/single_op_model.h" | |||
| #include "hybrid/node_executor/task_context.h" | |||
| namespace ge { | |||
| class BuildTaskUtils { | |||
| @@ -35,7 +36,14 @@ class BuildTaskUtils { | |||
| bool keep_workspace = true); | |||
| static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | |||
| static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | |||
| static std::string InnerGetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<const void *> &input_addrs, | |||
| const std::vector<const void *> &output_addrs); | |||
| static std::string GetTaskInfo(const OpDescPtr &op_desc); | |||
| static std::string GetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<DataBuffer> &inputs, | |||
| const std::vector<DataBuffer> &outputs); | |||
| static std::string GetTaskInfo(const hybrid::TaskContext& task_context); | |||
| template<typename T> | |||
| static std::string VectorToString(const std::vector<T> &values) { | |||
| std::stringstream ss; | |||
| @@ -89,6 +89,7 @@ Status OpTask::OpenDump(rtStream_t stream) { | |||
| void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | |||
| this->stub_name_ = name; | |||
| this->stub_func_ = stub_func; | |||
| this->task_name_ = name; | |||
| } | |||
| void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||
| @@ -345,49 +346,95 @@ Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | |||
| return SUCCESS; | |||
| } | |||
| Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| const vector<DataBuffer> &input_buffers, | |||
| vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &output_buffers, | |||
| rtStream_t stream) { | |||
| GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||
| std::vector<void *> args; | |||
| for (auto &buffer : input_buffers) { | |||
| args.emplace_back(buffer.data); | |||
| Status TbeOpTask::UpdateTilingArgs(rtStream_t stream) { | |||
| size_t args_size = input_num_ + output_num_ + workspaces_.size(); | |||
| if (tiling_buffer_ != nullptr) { | |||
| args_size++; | |||
| } | |||
| for (auto &buffer : output_buffers) { | |||
| args.emplace_back(buffer.data); | |||
| size_t temp_size = args_size * sizeof(void *); | |||
| if (arg_size_ < temp_size) { | |||
| GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||
| std::unique_ptr<uint8_t[]> args(new (std::nothrow) uint8_t[temp_size]()); | |||
| GE_CHECK_NOTNULL(args); | |||
| if (memcpy_s(args.get(), temp_size, args_.get(), arg_size_) != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| args_ = std::move(args); | |||
| arg_size_ = temp_size; | |||
| } | |||
| for (auto &buffer : workspaces_) { | |||
| args.emplace_back(buffer); | |||
| uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); | |||
| size_t arg_index = input_num_ + output_num_; | |||
| for (size_t i = 0; i < workspaces_.size(); ++i) { | |||
| arg_base[arg_index++] = reinterpret_cast<uintptr_t>(workspaces_[i]); | |||
| } | |||
| if (tiling_buffer_ != nullptr) { | |||
| GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); | |||
| GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), | |||
| RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | |||
| arg_base[arg_index] = reinterpret_cast<uintptr_t>(tiling_buffer_); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TbeOpTask::SetArgIndex() { | |||
| const vector<bool> v_is_input_const = op_desc_->GetIsInputConst(); | |||
| size_t input_index = 0; | |||
| for (size_t i = 0; i < op_desc_->GetAllInputsSize(); ++i) { | |||
| const GeTensorDescPtr tensor_desc = op_desc_->MutableInputDesc(static_cast<uint32_t>(i)); | |||
| if (tensor_desc == nullptr) { | |||
| GELOGD("SingleOp: %s, Index: %zu, has no input", op_desc_->GetName().c_str(), i); | |||
| continue; | |||
| } | |||
| if (i < v_is_input_const.size() && v_is_input_const[i]) { | |||
| GELOGD("SingleOp: %s, Index: %zu, input is const", op_desc_->GetName().c_str(), i); | |||
| input_index++; | |||
| continue; | |||
| } | |||
| arg_index_.emplace_back(input_index); | |||
| input_index++; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| args.emplace_back(tiling_buffer_); | |||
| Status TbeOpTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) { | |||
| if (arg_index_.size() != inputs.size()) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size] Args size is %zu, but get input size is %zu.", | |||
| arg_index_.size(), inputs.size()); | |||
| REPORT_INNER_ERROR("E19999", "[Check][Size] Args size is %zu, but get input size is %zu.", | |||
| arg_index_.size(), inputs.size()); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *)); | |||
| // node with workspace: build can not get size of workspace, need to update arg_size_ when execute | |||
| if (arg_size_ < (args.size() * sizeof(void *))) { | |||
| size_t temp_size = args.size() * sizeof(void *); | |||
| GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||
| args_.reset(new(std::nothrow) uint8_t[temp_size]()); | |||
| GE_CHECK_NOTNULL(args_); | |||
| arg_size_ = temp_size; | |||
| uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); | |||
| for (size_t i = 0; i < arg_index_.size(); ++i) { | |||
| arg_base[arg_index_[i]] = reinterpret_cast<uintptr_t>(inputs[i].data); | |||
| } | |||
| if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| for (size_t i = 0; i < op_desc_->GetOutputsSize(); ++i) { | |||
| arg_base[input_num_ + i] = reinterpret_cast<uintptr_t>(outputs[i].data); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| const vector<DataBuffer> &input_buffers, | |||
| vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &output_buffers, | |||
| rtStream_t stream) { | |||
| GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | |||
| GE_CHK_STATUS_RET(UpdateIoAddr(input_buffers, output_buffers), "[Update][IoAddr] failed."); | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||
| GE_CHK_STATUS_RET(UpdateTilingArgs(stream), "[Update][TilingArgs] failed."); | |||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | |||
| GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); | |||
| @@ -33,6 +33,10 @@ | |||
| #include "register/op_tiling.h" | |||
| namespace ge { | |||
| namespace { | |||
| const int kAddressNum = 2; | |||
| } // namespace | |||
| class StreamResource; | |||
| struct SingleOpModelParam; | |||
| class OpTask { | |||
| @@ -44,6 +48,7 @@ class OpTask { | |||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
| void SetModelArgs(std::string model_name, uint32_t model_id); | |||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
| const std::string &GetTaskName() const {return task_name_;} | |||
| void SetOpDesc(const OpDescPtr &op_desc) { | |||
| op_desc_ = op_desc; | |||
| } | |||
| @@ -66,6 +71,7 @@ class OpTask { | |||
| std::string model_name_; | |||
| uint32_t model_id_ = 0; | |||
| uint32_t block_dim_ = 1; | |||
| std::string task_name_; | |||
| }; | |||
| class TbeOpTask : public OpTask { | |||
| @@ -85,6 +91,7 @@ class TbeOpTask : public OpTask { | |||
| const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | |||
| Status UpdateRunInfo() override; | |||
| Status SetArgIndex(); | |||
| const void *GetArgs() const; | |||
| size_t GetArgSize() const; | |||
| @@ -100,7 +107,9 @@ class TbeOpTask : public OpTask { | |||
| Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | |||
| const vector<GeTensorDesc> &output_desc); | |||
| Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | |||
| Status UpdateTilingArgs(rtStream_t stream); | |||
| Status DoLaunchKernel(rtStream_t stream); | |||
| Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs); | |||
| const void *stub_func_ = nullptr; | |||
| std::unique_ptr<uint8_t[]> args_; | |||
| @@ -120,6 +129,9 @@ class TbeOpTask : public OpTask { | |||
| void* handle_ = nullptr; | |||
| std::string original_kernel_key_; | |||
| std::string node_info_; | |||
| std::vector<size_t> arg_index_; // data index in args | |||
| size_t input_num_; // include const input | |||
| size_t output_num_; | |||
| }; | |||
| class AiCpuBaseTask : public OpTask { | |||
| @@ -266,7 +278,7 @@ class MemcpyAsyncTask : public OpTask { | |||
| friend class SingleOpModel; | |||
| friend class RtsKernelTaskBuilder; | |||
| uintptr_t addresses_[2]; | |||
| uintptr_t addresses_[kAddressNum]; | |||
| size_t dst_max_; | |||
| size_t count_; | |||
| rtMemcpyKind_t kind_; | |||
| @@ -104,7 +104,7 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi | |||
| binary.version = 0; | |||
| binary.data = kernel_bin.GetBinData(); | |||
| binary.length = kernel_bin.GetBinDataSize(); | |||
| binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||
| GE_CHK_STATUS_RET_NOLOG(GetMagic(binary.magic)); | |||
| Status ret = 0; | |||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||
| ret = rtRegisterAllKernel(&binary, bin_handle); | |||
| @@ -387,6 +387,9 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ | |||
| } | |||
| task.SetStubFunc(stub_name_, stub_func); | |||
| } | |||
| GE_CHK_STATUS_RET(task.SetArgIndex(), "[Set][ArgTable] failed."); | |||
| task.input_num_ = op_desc_->GetInputsSize(); | |||
| task.output_num_ = op_desc_->GetOutputsSize(); | |||
| return SUCCESS; | |||
| } | |||
| @@ -413,4 +416,27 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | |||
| return SUCCESS; | |||
| } | |||
| Status TbeTaskBuilder::GetMagic(uint32_t &magic) const { | |||
| std::string json_string; | |||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_, TVM_ATTR_NAME_MAGIC, json_string), | |||
| GELOGD("Get original type of session_graph_id.")); | |||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||
| magic = RT_DEV_BINARY_MAGIC_ELF; | |||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||
| magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") { | |||
| magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE; | |||
| } else { | |||
| REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s), value:%s check invalid", | |||
| TVM_ATTR_NAME_MAGIC.c_str(), op_desc_->GetName().c_str(), | |||
| op_desc_->GetType().c_str(), json_string.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in op:%s(%s), value:%s check invalid", | |||
| TVM_ATTR_NAME_MAGIC.c_str(), op_desc_->GetName().c_str(), | |||
| op_desc_->GetType().c_str(), json_string.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -105,6 +105,7 @@ class TbeTaskBuilder { | |||
| const SingleOpModelParam ¶m); | |||
| Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | |||
| Status DoRegisterMeta(void *bin_handle); | |||
| Status GetMagic(uint32_t &magic) const; | |||
| static Status DoRegisterFunction(void *bin_handle, const char *stub_name, const char *kernel_name); | |||
| @@ -84,9 +84,10 @@ inline bool IsLogEnable(int module_name, int log_level) { | |||
| ##__VA_ARGS__); \ | |||
| } while (0) | |||
| #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
| dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | |||
| #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
| dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
| ##__VA_ARGS__) | |||
| // print memory when it is greater than 1KB. | |||
| #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | |||
| @@ -1 +1 @@ | |||
| Subproject commit 2ad00e17886fd06c0d00f8a8cf370783a3d31818 | |||
| Subproject commit 3e14f92d47abc9a2e703be2171f047553f7597e0 | |||
| @@ -1 +1 @@ | |||
| Subproject commit 79536a196f89cf7a1f5852ff7304b9a7d7b12eff | |||
| Subproject commit 4151e33028c518057289b569b36cd4069af362a4 | |||
| @@ -38,5 +38,20 @@ RUN wget https://github.com/ccup/lcov/archive/refs/tags/add_lcov.tar.gz -O add_l | |||
| ENV PROJECT_HOME=/code/Turing/graphEngine | |||
| RUN mkdir /var/run/sshd | |||
| RUN echo "root:root" | chpasswd | |||
| RUN sed -i 's/\#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config | |||
| RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd | |||
| ENV NOTVISIBLE "in users profile" | |||
| RUN echo "export VISIBLE=now" >> /etc/profile | |||
| EXPOSE 22 7777 | |||
| RUN useradd -ms /bin/bash debugger | |||
| RUN echo "debugger:ge123" | chpasswd | |||
| CMD ["/usr/sbin/sshd" "-D" "&"] | |||
| RUN echo "alias ge=/code/Turing/graphEngine/scripts/ge.sh">>~/.bashrc | |||
| @@ -21,7 +21,7 @@ MOUNT_PROJECT_HOME=$(cd $PROJECT_HOME || return; pwd) | |||
| DOCKER_BUILD_ENV_NAME=${MOUNT_PROJECT_HOME#*/} | |||
| DOCKER_BUILD_ENV_NAME=${DOCKER_BUILD_ENV_NAME//\//\_} | |||
| DOCKER_IMAGE_TAG=ge_build_env.1.0.6 | |||
| DOCKER_IMAGE_TAG=ge_build_env.1.0.9 | |||
| DOCKER_IAMGE_NAME=joycode2art/turing | |||
| DOCKER_FULL_IMAGE_NAME=${DOCKER_IAMGE_NAME}:${DOCKER_IMAGE_TAG} | |||
| @@ -61,7 +61,7 @@ function enter_docker_env(){ | |||
| if test -z "$(docker images |grep ${DOCKER_IAMGE_NAME} | grep ${DOCKER_IMAGE_TAG})"; then | |||
| echo "please run 'ge env --pull' to download images first!" | |||
| elif test -z "$(docker ps -a |grep ${DOCKER_BUILD_ENV_NAME})"; then | |||
| $docker_cmd run -it -v ${MOUNT_PROJECT_HOME}:/code/Turing/graphEngine --workdir ${docker_work_dir} --name ${DOCKER_BUILD_ENV_NAME} ${DOCKER_FULL_IMAGE_NAME} ${docker_bash_dir} | |||
| $docker_cmd run -p 7002:22 -p 7003:7777 --privileged=true -it -v ${MOUNT_PROJECT_HOME}:/code/Turing/graphEngine --workdir ${docker_work_dir} --name ${DOCKER_BUILD_ENV_NAME} ${DOCKER_FULL_IMAGE_NAME} ${docker_bash_dir} | |||
| elif test -z "$(docker ps |grep ${DOCKER_BUILD_ENV_NAME})"; then | |||
| $docker_cmd start ${DOCKER_BUILD_ENV_NAME} | |||
| $docker_cmd exec -w ${docker_work_dir} -it ${DOCKER_BUILD_ENV_NAME} ${docker_bash_dir} | |||
| @@ -38,7 +38,7 @@ function extract_deps_so_community() | |||
| { | |||
| echo "begin to extract .run file ........." | |||
| chmod +x ./${DRIVER_RUN_NAME_C} | |||
| chmod +X ./${PACKAGE_NAME_C} | |||
| chmod +x ./${PACKAGE_NAME_C} | |||
| [ -n "${DEP_TMP_DIR}" ] && rm -rf "${DEP_TMP_DIR}" | |||
| ./${DRIVER_RUN_NAME_C} --noexec --extract=${DEP_TMP_DIR}/driver | |||
| ./${PACKAGE_NAME_C} --noexec --extract=${DEP_TMP_DIR}/Packages_tmp | |||
| @@ -22,6 +22,7 @@ add_subdirectory(depends/runtime) | |||
| add_subdirectory(depends/hccl) | |||
| add_subdirectory(depends/profiler) | |||
| add_subdirectory(depends/error_manager) | |||
| add_subdirectory(depends/opt_info) | |||
| if (ENABLE_GE_COV OR ENABLE_GE_UT) | |||
| add_subdirectory(ut) | |||
| @@ -60,6 +60,7 @@ set(SRCS | |||
| "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" | |||
| "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" | |||
| "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" | |||
| "${GE_CODE_DIR}/metadef/graph/utils/dumper/ge_graph_dumper.cc" | |||
| "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" | |||
| "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" | |||
| "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" | |||
| @@ -345,6 +345,10 @@ INT32 mmIsDir(const CHAR *fileName) | |||
| INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len) | |||
| { | |||
| const char *env = getenv(name); | |||
| if (env != nullptr) { | |||
| strcpy(value, env); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,37 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| #cmake_minimum_required(VERSION 2.8) | |||
| project(opt_feature_stub) | |||
| file(GLOB_RECURSE SRCS RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
| "src/opt_info_stub.cc" | |||
| ) | |||
| include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info) | |||
| add_library(opt_feature_stub SHARED ${SRCS}) | |||
| target_compile_options(opt_feature_stub PRIVATE | |||
| -g | |||
| ) | |||
| target_link_libraries(opt_feature_stub PRIVATE | |||
| $<BUILD_INTERFACE:intf_pub> | |||
| c_sec | |||
| ) | |||
| target_include_directories(opt_feature_stub INTERFACE ${CMAKE_CURRENT_LIST_DIR}/src) | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "opt_info.h" | |||
| #include <string> | |||
| #include <map> | |||
| #include <vector> | |||
| #include <algorithm> | |||
| namespace gelc { | |||
| namespace { | |||
| const std::vector<std::string> kSocVersions = {"Ascend910"}; | |||
| } | |||
| void SetAllOptInfo(std::map<std::string, std::string> &opt_infos) { | |||
| opt_infos.emplace("opt_module.fe", "all"); | |||
| opt_infos.emplace("opt_module.pass", "all"); | |||
| opt_infos.emplace("opt_module.op_tune", "all"); | |||
| opt_infos.emplace("opt_module.rl_tune", "all"); | |||
| opt_infos.emplace("opt_module.aoe", "all"); | |||
| } | |||
| Status GetOptInfo(WorkMode mode, const std::string &soc_ver, | |||
| std::map<std::string, std::string> &opt_infos) { | |||
| if (std::find(kSocVersions.begin(), kSocVersions.end(), soc_ver)== kSocVersions.end()) { | |||
| SetAllOptInfo(opt_infos); | |||
| return SUCCESS; | |||
| } | |||
| opt_infos.emplace("opt_module.fe", "all"); | |||
| opt_infos.emplace("opt_module.pass", "all"); | |||
| opt_infos.emplace("opt_module.op_tune", "all"); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace gelc | |||
| @@ -23,13 +23,46 @@ | |||
| void dav_log(int module_id, const char *fmt, ...) {} | |||
| void DlogErrorInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||
| static int log_level = DLOG_ERROR; | |||
| #define __DO_PRINT() \ | |||
| do { \ | |||
| const int FMT_BUFF_SIZE = 1024; \ | |||
| char fmt_buff[FMT_BUFF_SIZE] = {0}; \ | |||
| va_list valist; \ | |||
| va_start(valist, fmt); \ | |||
| vsnprintf(fmt_buff, FMT_BUFF_SIZE, fmt, valist); \ | |||
| va_end(valist); \ | |||
| printf("%s \n", fmt_buff); \ | |||
| } while (0) | |||
| void DlogErrorInner(int module_id, const char *fmt, ...) { | |||
| if (log_level > DLOG_ERROR) { | |||
| return; | |||
| } | |||
| __DO_PRINT(); | |||
| } | |||
| void DlogWarnInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||
| void DlogWarnInner(int module_id, const char *fmt, ...) { | |||
| if (log_level > DLOG_WARN) { | |||
| return; | |||
| } | |||
| __DO_PRINT(); | |||
| } | |||
| void DlogInfoInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||
| void DlogInfoInner(int module_id, const char *fmt, ...) { | |||
| if (log_level > DLOG_INFO) { | |||
| return; | |||
| } | |||
| __DO_PRINT(); | |||
| } | |||
| void DlogDebugInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||
| void DlogDebugInner(int module_id, const char *fmt, ...) { | |||
| if (log_level > DLOG_DEBUG) { | |||
| return; | |||
| } | |||
| __DO_PRINT(); | |||
| } | |||
| void DlogEventInner(int module_id, const char *fmt, ...) { dav_log(module_id, fmt); } | |||
| @@ -39,30 +72,25 @@ void DlogWithKVInner(int module_id, int level, KeyValue *pst_kv_array, int kv_nu | |||
| dav_log(module_id, fmt); | |||
| } | |||
| int dlog_setlevel(int module_id, int level, int enable_event) { return DLOG_DEBUG; } | |||
| int dlog_setlevel(int module_id, int level, int enable_event) { | |||
| log_level = level; | |||
| return log_level; | |||
| } | |||
| int dlog_getlevel(int module_id, int *enable_event) { return DLOG_DEBUG; } | |||
| int dlog_getlevel(int module_id, int *enable_event) { return log_level; } | |||
| int CheckLogLevel(int moduleId, int logLevel) | |||
| { | |||
| return 1; | |||
| } | |||
| int CheckLogLevel(int moduleId, int log_level_check) { return log_level >= log_level_check; } | |||
| /** | |||
| * @ingroup plog | |||
| * @brief DlogReportInitialize: init log in service process before all device setting. | |||
| * @return: 0: SUCCEED, others: FAILED | |||
| */ | |||
| int DlogReportInitialize() { | |||
| return 0; | |||
| } | |||
| int DlogReportInitialize() { return 0; } | |||
| /** | |||
| * @ingroup plog | |||
| * @brief DlogReportFinalize: release log resource in service process after all device reset. | |||
| * @return: 0: SUCCEED, others: FAILED | |||
| */ | |||
| int DlogReportFinalize() { | |||
| return 0; | |||
| } | |||
| int DlogReportFinalize() { return 0; } | |||
| @@ -15,18 +15,5 @@ | |||
| include(cmake/graphengine.cmake) | |||
| add_subdirectory(easy_graph) | |||
| add_subdirectory(stub_engine) | |||
| add_subdirectory(ge_graph_dsl) | |||
| file(GLOB_RECURSE UTILS_SRC CONFIGURE_DEPENDS | |||
| "utils/*.cc" | |||
| ) | |||
| add_library(framework STATIC ${UTILS_SRC}) | |||
| target_include_directories(framework | |||
| PUBLIC utils/ | |||
| ) | |||
| set_target_properties(framework PROPERTIES CXX_STANDARD 11) | |||
| target_link_libraries(framework PUBLIC ge_graph_dsl graphengine fe) | |||
| add_subdirectory(ge_running_env) | |||
| @@ -103,6 +103,7 @@ list(APPEND INCLUDE_DIRECTORIES | |||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/cce" | |||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/ops" | |||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain" | |||
| "${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info" | |||
| "${GE_CODE_DIR}/tests/ut/ge" | |||
| "${GE_CODE_DIR}/tests/ut/common" | |||
| "${CMAKE_BINARY_DIR}" | |||
| @@ -117,6 +118,7 @@ list(APPEND STUB_LIBS | |||
| runtime_stub | |||
| profiler_stub | |||
| hccl_stub | |||
| opt_feature_stub | |||
| error_manager_stub | |||
| ascend_protobuf | |||
| json | |||
| @@ -150,7 +152,7 @@ set_target_properties(metadef_graph PROPERTIES CXX_STANDARD 11) | |||
| # ---- Target : Local engine ---- | |||
| add_library(ge_local_engine SHARED ${LOCAL_ENGINE_SRC} ${METADEF_REGISTER_SRCS}) | |||
| add_library(ge_local_engine SHARED ${LOCAL_ENGINE_SRC}) | |||
| target_include_directories(ge_local_engine | |||
| PUBLIC | |||
| @@ -169,38 +171,11 @@ target_compile_options(ge_local_engine PRIVATE | |||
| target_link_libraries(ge_local_engine PUBLIC | |||
| $<BUILD_INTERFACE:intf_pub> ${STUB_LIBS} | |||
| metadef_graph | |||
| -lrt -ldl -lpthread -lgcov | |||
| ) | |||
| set_target_properties(ge_local_engine PROPERTIES CXX_STANDARD 11) | |||
| # ---- Target : Host engine ---- | |||
| add_library(host_cpu_engine SHARED ${HOST_ENGINE_SRC}) | |||
| target_include_directories(host_cpu_engine | |||
| PUBLIC | |||
| "${INCLUDE_DIRECTORIES}" | |||
| "${GE_CODE_DIR}/ge/host_cpu_engine" | |||
| ) | |||
| target_compile_definitions(host_cpu_engine PRIVATE | |||
| google=ascend_private | |||
| FMK_SUPPORT_DUMP | |||
| ) | |||
| target_compile_options(host_cpu_engine PRIVATE | |||
| -g --coverage -fprofile-arcs -ftest-coverage | |||
| -Werror=format | |||
| ) | |||
| target_link_libraries(host_cpu_engine PUBLIC | |||
| $<BUILD_INTERFACE:intf_pub> ${STUB_LIBS} metadef_graph -lrt -ldl -lpthread -lgcov | |||
| ) | |||
| set_target_properties(host_cpu_engine PROPERTIES CXX_STANDARD 11) | |||
| # ---- Target : engine plugin---- | |||
| # | |||
| @@ -273,4 +248,4 @@ target_link_libraries(graphengine PUBLIC | |||
| ) | |||
| set_target_properties(graphengine PROPERTIES CXX_STANDARD 11) | |||
| add_dependencies(graphengine host_cpu_engine ge_local_engine nnengine engine_conf.json optimizer_priority.pbtxt) | |||
| add_dependencies(graphengine ge_local_engine nnengine engine_conf.json optimizer_priority.pbtxt) | |||
| @@ -26,16 +26,32 @@ EG_NS_BEGIN | |||
| //////////////////////////////////////////////////////////////// | |||
| namespace detail { | |||
| template<typename GRAPH_BUILDER> | |||
| template <typename GRAPH_BUILDER> | |||
| Graph BuildGraph(const char *name, GRAPH_BUILDER builderInDSL) { | |||
| GraphBuilder builder(name); | |||
| builderInDSL(builder); | |||
| return std::move(*builder); | |||
| } | |||
| struct GraphDefiner { | |||
| GraphDefiner(const char *defaultName, const char *specifiedName = nullptr) { | |||
| name = specifiedName ? specifiedName : defaultName; | |||
| } | |||
| template <typename USER_BUILDER> | |||
| auto operator|(USER_BUILDER &&userBuilder) { | |||
| GraphBuilder graphBuilder{name}; | |||
| std::forward<USER_BUILDER>(userBuilder)(graphBuilder); | |||
| return *graphBuilder; | |||
| } | |||
| private: | |||
| const char *name; | |||
| }; | |||
| } // namespace detail | |||
| #define HAS_NAME(...) NOT_EMPTY_SELECT(__VA_ARGS__) | |||
| #define DEF_GRAPH(G, ...) ::EG_NS::Graph G = ::EG_NS::detail::BuildGraph(HAS_NAME(__VA_ARGS__)(__VA_ARGS__, #G), [&](::EG_NS::GraphBuilder& BUILDER) | |||
| #define DEF_GRAPH(G, ...) ::EG_NS::Graph G = ::EG_NS::detail::GraphDefiner(#G, ##__VA_ARGS__) | [&](auto &&BUILDER) | |||
| #define DATA_CHAIN(...) ::EG_NS::ChainBuilder(BUILDER, ::EG_NS::EdgeType::DATA)->__VA_ARGS__ | |||
| #define CTRL_CHAIN(...) ::EG_NS::ChainBuilder(BUILDER, ::EG_NS::EdgeType::CTRL)->__VA_ARGS__ | |||
| #define CHAIN(...) DATA_CHAIN(__VA_ARGS__) | |||
| @@ -16,10 +16,15 @@ | |||
| #include "easy_graph/layout/graph_layout.h" | |||
| #include "easy_graph/layout/layout_executor.h" | |||
| #include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h" | |||
| #include "easy_graph/graph/graph.h" | |||
| EG_NS_BEGIN | |||
| namespace { | |||
| GraphEasyExecutor default_executor; | |||
| } | |||
| void GraphLayout::Config(LayoutExecutor &executor, const LayoutOption *opts) { | |||
| this->executor_ = &executor; | |||
| options_ = opts; | |||
| @@ -27,8 +32,7 @@ void GraphLayout::Config(LayoutExecutor &executor, const LayoutOption *opts) { | |||
| Status GraphLayout::Layout(const Graph &graph, const LayoutOption *opts) { | |||
| const LayoutOption *options = opts ? opts : this->options_; | |||
| if (!executor_) | |||
| return EG_UNIMPLEMENTED; | |||
| if (!executor_) return static_cast<LayoutExecutor &>(default_executor).Layout(graph, options); | |||
| return executor_->Layout(graph, options); | |||
| } | |||
| @@ -0,0 +1,37 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef D52AA06185E34BBFB714FFBCDAB0D53A | |||
| #define D52AA06185E34BBFB714FFBCDAB0D53A | |||
| #include "ge_graph_dsl/ge.h" | |||
| #include <exception> | |||
| #include <string> | |||
| GE_NS_BEGIN | |||
| struct AssertError : std::exception { | |||
| AssertError(const char *file, int line, const std::string &info); | |||
| private: | |||
| const char *what() const noexcept override; | |||
| private: | |||
| std::string info; | |||
| }; | |||
| GE_NS_END | |||
| #endif | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef INC_31309AA0A4E44C009C22AD9351BF3410 | |||
| #define INC_31309AA0A4E44C009C22AD9351BF3410 | |||
| #include "ge_graph_dsl/ge.h" | |||
| #include "graph/compute_graph.h" | |||
| GE_NS_BEGIN | |||
| using GraphCheckFun = std::function<void(const ::GE_NS::ComputeGraphPtr &)>; | |||
| struct CheckUtils { | |||
| static bool CheckGraph(const std::string &phase_id, const GraphCheckFun &fun); | |||
| static void init(); | |||
| }; | |||
| GE_NS_END | |||
| #endif | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef C8B32320BD4943D588594B82FFBF2685 | |||
| #define C8B32320BD4943D588594B82FFBF2685 | |||
| #include <vector> | |||
| #include <string> | |||
| #include "ge_graph_dsl/ge.h" | |||
| GE_NS_BEGIN | |||
| struct FilterScopeGuard { | |||
| FilterScopeGuard(const std::vector<std::string> &); | |||
| ~FilterScopeGuard(); | |||
| }; | |||
| GE_NS_END | |||
| #endif | |||
| @@ -0,0 +1,59 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AD954C4ADF5B44F5B1CC8BCD72EE9ED6 | |||
| #define AD954C4ADF5B44F5B1CC8BCD72EE9ED6 | |||
| #include "ge_graph_dsl/ge.h" | |||
| #include "ge_graph_dsl/assert/check_utils.h" | |||
| #include "ge_graph_dsl/assert/assert_error.h" | |||
| #include "ge_graph_dsl/assert/filter_scope_guard.h" | |||
| GE_NS_BEGIN | |||
| #ifdef GTEST_MESSAGE_AT_ | |||
| #define GRAPH_CHECK_MESSAGE(file, line, message) \ | |||
| GTEST_MESSAGE_AT_(file, line, message, ::testing::TestPartResult::kFatalFailure) | |||
| #elif | |||
| #define GRAPH_CHECK_MESSAGE(file, line, message) throw AssertError(file, line, message) | |||
| #endif | |||
| namespace detail { | |||
| struct GraphAssert { | |||
| GraphAssert(const char *file, unsigned int line, const std::string &phase_id) | |||
| : file_(file), line_(line), phase_id_(phase_id) {} | |||
| void operator|(const ::GE_NS::GraphCheckFun &check_fun) { | |||
| bool ret = ::GE_NS::CheckUtils::CheckGraph(phase_id_, check_fun); | |||
| if (!ret) { | |||
| auto message = "expect dump graph in phase: [" + phase_id_ + "], while not find the dump graph! "; | |||
| GRAPH_CHECK_MESSAGE(file_, line_, message.c_str()); | |||
| } | |||
| } | |||
| private: | |||
| const char *file_; | |||
| unsigned int line_; | |||
| const std::string phase_id_; | |||
| }; | |||
| } // namespace detail | |||
| #define DUMP_GRAPH_WHEN(...) ::GE_NS::FilterScopeGuard guard__COUNTER__({__VA_ARGS__}); | |||
| #define CHECK_GRAPH(phase_id) \ | |||
| ::GE_NS::detail::GraphAssert(__FILE__, __LINE__, #phase_id) | [&](const ::GE_NS::ComputeGraphPtr &graph) | |||
| GE_NS_END | |||
| #endif | |||
| @@ -33,14 +33,12 @@ struct OpDescCfg { | |||
| std::vector<int64_t> shape_; | |||
| }; | |||
| OpDescCfg(const OpType &type, int in_cnt = 0, int out_cnt = 0, Format format = FORMAT_NCHW, | |||
| OpDescCfg(const OpType &type, int in_cnt = 1, int out_cnt = 1, Format format = FORMAT_NCHW, | |||
| DataType data_type = DT_FLOAT, std::vector<int64_t> shape = {1, 1, 224, 224}) | |||
| : type_(type), in_cnt_(in_cnt), out_cnt_(out_cnt), default_tensor_(format, data_type, shape) {} | |||
| protected: | |||
| OpType GetType() const { | |||
| return type_; | |||
| } | |||
| OpType GetType() const { return type_; } | |||
| OpType type_; | |||
| int in_cnt_; | |||
| int out_cnt_; | |||
| @@ -21,6 +21,7 @@ | |||
| #include "ge_graph_dsl/ge.h" | |||
| #include "ge_graph_dsl/op_desc/op_box.h" | |||
| #include "ge_graph_dsl/op_desc/op_desc_cfg.h" | |||
| #include "graph/ge_attr_value.h" | |||
| #include "graph/op_desc.h" | |||
| GE_NS_BEGIN | |||
| @@ -29,19 +30,32 @@ struct OpDescCfgBox : OpBox, private OpDescCfg { | |||
| OpDescCfgBox(const OpType &opType); | |||
| OpDescCfgBox &InCnt(int in_cnt); | |||
| OpDescCfgBox &OutCnt(int out_cnt); | |||
| OpDescCfgBox &ParentNodeIndex(int node_index); | |||
| OpDescCfgBox &TensorDesc(Format format = FORMAT_NCHW, DataType data_type = DT_FLOAT, | |||
| std::vector<int64_t> shape = {1, 1, 224, 224}); | |||
| template<typename Type> | |||
| OpDescCfgBox& Attr(const std::string &name, Type value) { | |||
| auto attrvalue = ge::GeAttrValue::CreateFrom<Type>(value); | |||
| attrs_.emplace(std::make_pair(name, attrvalue)); | |||
| return *this; | |||
| } | |||
| std::vector<int64_t> shape = {1, 1, 224, 224}); | |||
| OpDescCfgBox &Weight(GeTensorPtr &); | |||
| private: | |||
| template <typename Type> | |||
| OpDescCfgBox &Attr(const std::string &name, Type &&value) { | |||
| auto attrvalue = ge::GeAttrValue::CreateFrom<Type>(std::forward<Type>(value)); | |||
| attrs_.emplace(std::make_pair(name, attrvalue)); | |||
| return *this; | |||
| } | |||
| template <typename Type> | |||
| OpDescCfgBox &Attr(const std::string &name, Type &value) { | |||
| auto attrvalue = ge::GeAttrValue::CreateFrom<Type>(value); | |||
| attrs_.emplace(std::make_pair(name, attrvalue)); | |||
| return *this; | |||
| } | |||
| OpDescCfgBox &Attr(const std::string &name, int value); | |||
| OpDescCfgBox &Attr(const std::string &name, const char *value); | |||
| OpDescPtr Build(const ::EG_NS::NodeId &id) const override; | |||
| void UpdateAttrs(OpDescPtr&) const; | |||
| std::map<std::string, GeAttrValue> attrs_; | |||
| private: | |||
| void UpdateAttrs(OpDescPtr &) const; | |||
| std::map<std::string, GeAttrValue> attrs_; | |||
| }; | |||
| #define OP_CFG(optype) ::GE_NS::OpDescCfgBox(optype) | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "ge_graph_dsl/assert/assert_error.h" | |||
| GE_NS_BEGIN | |||
| AssertError::AssertError(const char *file, int line, const std::string &info) { | |||
| this->info = std::string(file) + ":" + std::to_string(line) + "\n" + info; | |||
| } | |||
| const char *AssertError::what() const noexcept { return info.c_str(); } | |||
| GE_NS_END | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "ge_graph_dsl/assert/check_utils.h" | |||
| #include "graph/utils/dumper/ge_graph_dumper.h" | |||
| #include "ge_graph_default_checker.h" | |||
| #include "ge_graph_check_dumper.h" | |||
| GE_NS_BEGIN | |||
| bool CheckUtils::CheckGraph(const std::string &phase_id, const GraphCheckFun &fun) { | |||
| auto &dumper = dynamic_cast<GeGraphCheckDumper &>(GraphDumperRegistry::GetDumper()); | |||
| return dumper.CheckFor(GeGraphDefaultChecker(phase_id, fun)); | |||
| } | |||
| void CheckUtils::init() { | |||
| static GeGraphCheckDumper checkDumper; | |||
| GraphDumperRegistry::Register(checkDumper); | |||
| } | |||
| GE_NS_END | |||
| @@ -0,0 +1,31 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "ge_graph_dsl/assert/filter_scope_guard.h" | |||
| #include "graph/utils/dumper/ge_graph_dumper.h" | |||
| #include "ge_dump_filter.h" | |||
| GE_NS_BEGIN | |||
| namespace { | |||
| GeDumpFilter &GetDumpFilter() { return dynamic_cast<GeDumpFilter &>(GraphDumperRegistry::GetDumper()); } | |||
| } // namespace | |||
| FilterScopeGuard::FilterScopeGuard(const std::vector<std::string> &filter) { GetDumpFilter().Update(filter); } | |||
| FilterScopeGuard::~FilterScopeGuard() { GetDumpFilter().Reset(); } | |||
| GE_NS_END | |||