| @@ -56,7 +56,7 @@ if (ENABLE_OPEN_SRC) | |||||
| set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) | set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) | ||||
| set(STATIC_ACL_LIB ${GE_LIB_PATH}) | set(STATIC_ACL_LIB ${GE_LIB_PATH}) | ||||
| find_module(slog libslog.so ${GE_LIB_PATH}) | find_module(slog libslog.so ${GE_LIB_PATH}) | ||||
| find_module(mmpa libmmpa.so ${GE_LIB_PATH}) | |||||
| find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) | |||||
| find_module(msprof libmsprof.so ${GE_LIB_PATH}) | find_module(msprof libmsprof.so ${GE_LIB_PATH}) | ||||
| find_module(hccl libhccl.so ${GE_LIB_PATH}) | find_module(hccl libhccl.so ${GE_LIB_PATH}) | ||||
| find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | ||||
| @@ -67,10 +67,10 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | ||||
| find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | ||||
| find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) | find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) | ||||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | |||||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | |||||
| else() | else() | ||||
| find_module(slog libslog.so ${ASCEND_ATC_DIR}) | find_module(slog libslog.so ${ASCEND_ATC_DIR}) | ||||
| find_module(mmpa libmmpa.so ${ASCEND_ATC_DIR}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | |||||
| if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
| find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) | find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) | ||||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | ||||
| @@ -91,7 +91,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR}) | find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR}) | ||||
| elseif(PRODUCT STREQUAL "flr1") | elseif(PRODUCT STREQUAL "flr1") | ||||
| @@ -114,7 +114,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
| else() | else() | ||||
| message(FATAL_ERROR "PLATFORM param is invalid, should be train or inference, build terminated") | message(FATAL_ERROR "PLATFORM param is invalid, should be train or inference, build terminated") | ||||
| endif() | endif() | ||||
| @@ -148,14 +148,21 @@ elseif (ENABLE_D OR ENABLE_ACL) | |||||
| # common libraries | # common libraries | ||||
| find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) | find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| # training | # training | ||||
| find_module(mmpa libmmpa.so ${ASCEND_MS_DRIVER_PATH}) | |||||
| find_module(runtime libruntime.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(runtime libruntime.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| find_module(register libregister.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(register libregister.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| endif () | endif () | ||||
| set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | |||||
| add_subdirectory(metadef) | |||||
| elseif(ENABLE_MS_TESTCASE) | |||||
| include(cmake/external_libs/protobuf_static.cmake) | |||||
| include(cmake/external_libs/securec.cmake) | |||||
| include(cmake/intf_pub_linux.cmake) | |||||
| set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | ||||
| add_subdirectory(metadef) | add_subdirectory(metadef) | ||||
| else() | else() | ||||
| @@ -48,7 +48,7 @@ set_target_properties(ascend_protobuf_static_lib PROPERTIES | |||||
| add_library(ascend_protobuf_static INTERFACE) | add_library(ascend_protobuf_static INTERFACE) | ||||
| target_include_directories(ascend_protobuf_static INTERFACE ${PROTOBUF_STATIC_PKG_DIR}/include) | target_include_directories(ascend_protobuf_static INTERFACE ${PROTOBUF_STATIC_PKG_DIR}/include) | ||||
| target_link_libraries(ascend_protobuf_static INTERFACE ascend_protobuf_static_lib) | target_link_libraries(ascend_protobuf_static INTERFACE ascend_protobuf_static_lib) | ||||
| if (ENABLE_D OR ENABLE_ACL) | |||||
| if (ENABLE_D OR ENABLE_ACL OR ENABLE_MS_TESTCASES) | |||||
| include_directories(${PROTOBUF_STATIC_PKG_DIR}/include) | include_directories(${PROTOBUF_STATIC_PKG_DIR}/include) | ||||
| endif () | endif () | ||||
| @@ -1,4 +1,4 @@ | |||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL) | |||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||||
| add_subdirectory(common) | add_subdirectory(common) | ||||
| add_subdirectory(plugin/engine) | add_subdirectory(plugin/engine) | ||||
| add_subdirectory(graph/build/memory) | add_subdirectory(graph/build/memory) | ||||
| @@ -600,7 +600,7 @@ set(INFER_SRC_LIST | |||||
| "analyzer/analyzer.cc" | "analyzer/analyzer.cc" | ||||
| ) | ) | ||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL) | |||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||||
| ############ libge_runner.so ############ | ############ libge_runner.so ############ | ||||
| add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | ||||
| @@ -648,6 +648,7 @@ target_link_libraries(ge_runner | |||||
| ge_memory | ge_memory | ||||
| adump_server | adump_server | ||||
| msprofiler | msprofiler | ||||
| static_mmpa | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| ge_common | ge_common | ||||
| @@ -655,7 +656,6 @@ target_link_libraries(ge_runner | |||||
| register | register | ||||
| c_sec | c_sec | ||||
| slog | slog | ||||
| mmpa | |||||
| msprof | msprof | ||||
| runtime | runtime | ||||
| resource | resource | ||||
| @@ -712,6 +712,7 @@ target_include_directories(ge_compiler PRIVATE | |||||
| target_link_libraries(ge_compiler | target_link_libraries(ge_compiler | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_memory | ge_memory | ||||
| static_mmpa | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| ge_common | ge_common | ||||
| @@ -720,7 +721,6 @@ target_link_libraries(ge_compiler | |||||
| c_sec | c_sec | ||||
| error_manager | error_manager | ||||
| slog | slog | ||||
| mmpa | |||||
| runtime_compile | runtime_compile | ||||
| resource | resource | ||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| @@ -770,6 +770,7 @@ target_link_libraries(opensrc_ascendcl PRIVATE | |||||
| ge_executor | ge_executor | ||||
| ge_common_static | ge_common_static | ||||
| graph_static | graph_static | ||||
| static_mmpa | |||||
| ascend_protobuf_static | ascend_protobuf_static | ||||
| register_static | register_static | ||||
| error_manager_static | error_manager_static | ||||
| @@ -779,11 +780,11 @@ target_link_libraries(opensrc_ascendcl PRIVATE | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| c_sec | c_sec | ||||
| runtime | runtime | ||||
| mmpa | |||||
| slog | slog | ||||
| msprof | msprof | ||||
| ascend_hal_stub | ascend_hal_stub | ||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| -lrt | |||||
| -ldl | -ldl | ||||
| json | json | ||||
| ) | ) | ||||
| @@ -177,7 +177,7 @@ Session::Session(const std::map<string, string> &options) { | |||||
| // check init status | // check init status | ||||
| sessionId_ = 0; | sessionId_ = 0; | ||||
| if (!g_ge_initialized) { | if (!g_ge_initialized) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED); | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized."); | |||||
| return; | return; | ||||
| } | } | ||||
| // call Initialize | // call Initialize | ||||
| @@ -105,6 +105,7 @@ target_include_directories(ge_common PRIVATE | |||||
| target_link_libraries(ge_common PRIVATE | target_link_libraries(ge_common PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| static_mmpa | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| ascend_protobuf | ascend_protobuf | ||||
| @@ -112,7 +113,6 @@ target_link_libraries(ge_common PRIVATE | |||||
| c_sec | c_sec | ||||
| error_manager | error_manager | ||||
| slog | slog | ||||
| mmpa | |||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| json | json | ||||
| -lrt | -lrt | ||||
| @@ -210,7 +210,7 @@ target_link_libraries(ge_common PRIVATE | |||||
| c_sec | c_sec | ||||
| error_manager | error_manager | ||||
| slog | slog | ||||
| mmpa | |||||
| static_mmpa | |||||
| -Wl,--as-needed | -Wl,--as-needed | ||||
| json | json | ||||
| -lrt | -lrt | ||||
| @@ -16,9 +16,7 @@ | |||||
| #include "common/auth/file_saver.h" | #include "common/auth/file_saver.h" | ||||
| #include <fcntl.h> | |||||
| #include <securec.h> | #include <securec.h> | ||||
| #include <unistd.h> | |||||
| #include <cstdlib> | #include <cstdlib> | ||||
| #include <fstream> | #include <fstream> | ||||
| #include <vector> | #include <vector> | ||||
| @@ -39,12 +37,12 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| char real_path[PATH_MAX] = {0}; | |||||
| GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr, | |||||
| char real_path[MMPA_MAX_PATH] = {0}; | |||||
| GE_IF_BOOL_EXEC(mmRealPath(file_path.c_str(), real_path, MMPA_MAX_PATH) != EN_OK, | |||||
| GELOGI("File %s is not exist, it will be created.", file_path.c_str())); | GELOGI("File %s is not exist, it will be created.", file_path.c_str())); | ||||
| // Open file | // Open file | ||||
| mode_t mode = S_IRUSR | S_IWUSR; | |||||
| fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | |||||
| mmMode_t mode = M_IRUSR | M_IWUSR; | |||||
| fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode); | |||||
| if (fd == EN_INVALID_PARAM || fd == EN_ERROR) { | if (fd == EN_INVALID_PARAM || fd == EN_ERROR) { | ||||
| // -1: Failed to open file; - 2: Illegal parameter | // -1: Failed to open file; - 2: Illegal parameter | ||||
| GELOGE(FAILED, "Open file failed. mmpa_errno = %d, %s", fd, strerror(errno)); | GELOGE(FAILED, "Open file failed. mmpa_errno = %d, %s", fd, strerror(errno)); | ||||
| @@ -194,7 +192,7 @@ Status FileSaver::SaveToBuffWithFileHeader(const ModelFileHeader &file_header, | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::CheckPath(const std::string &file_path) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::CheckPath(const std::string &file_path) { | ||||
| // Determine file path length | // Determine file path length | ||||
| if (file_path.size() >= PATH_MAX) { | |||||
| if (file_path.size() >= MMPA_MAX_PATH) { | |||||
| GELOGE(FAILED, "Path is too long:%zu", file_path.size()); | GELOGE(FAILED, "Path is too long:%zu", file_path.size()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -25,7 +25,7 @@ void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) | |||||
| } | } | ||||
| void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> &op_desc) const { | void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> &op_desc) const { | ||||
| GELOGI("LoadCustAICPUKernelBinToOpDesc in"); | |||||
| GELOGD("LoadCustAICPUKernelBinToOpDesc in"); | |||||
| if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
| auto kernel_bin = FindKernel(op_desc->GetName()); | auto kernel_bin = FindKernel(op_desc->GetName()); | ||||
| if (kernel_bin != nullptr) { | if (kernel_bin != nullptr) { | ||||
| @@ -34,6 +34,6 @@ void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr< | |||||
| GELOGI("Load cust aicpu kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | GELOGI("Load cust aicpu kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("LoadCustAICPUKernelBinToOpDesc success"); | |||||
| GELOGD("LoadCustAICPUKernelBinToOpDesc success"); | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -16,9 +16,6 @@ | |||||
| #include "common/debug/memory_dumper.h" | #include "common/debug/memory_dumper.h" | ||||
| #include <fcntl.h> | |||||
| #include <unistd.h> | |||||
| #include <string> | #include <string> | ||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| @@ -138,26 +135,26 @@ int MemoryDumper::OpenFile(const char *filename) { | |||||
| } | } | ||||
| // Get the absolute path | // Get the absolute path | ||||
| string real_path; | string real_path; | ||||
| char tmp_path[PATH_MAX] = {0}; | |||||
| char tmp_path[MMPA_MAX_PATH] = {0}; | |||||
| GE_IF_BOOL_EXEC( | GE_IF_BOOL_EXEC( | ||||
| -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); | -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); | ||||
| string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); | string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(realpath(prefix_path.c_str(), tmp_path) == nullptr, return kInvalidFd, | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!"); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmRealPath(prefix_path.c_str(), tmp_path, MMPA_MAX_PATH) != EN_OK, return kInvalidFd, | |||||
| "Dir %s does not exit.", prefix_path.c_str()); | "Dir %s does not exit.", prefix_path.c_str()); | ||||
| real_path = std::string(tmp_path) + last_path;) | real_path = std::string(tmp_path) + last_path;) | ||||
| GE_IF_BOOL_EXEC( | GE_IF_BOOL_EXEC( | ||||
| path_split_pos == -1 || path_split_pos == 0, | path_split_pos == -1 || path_split_pos == 0, | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); | |||||
| GE_IF_BOOL_EXEC(realpath(filename, tmp_path) == nullptr, | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!"); | |||||
| GE_IF_BOOL_EXEC(mmRealPath(filename, tmp_path, MMPA_MAX_PATH) != EN_OK, | |||||
| GELOGI("File %s does not exit, it will be created.", filename)); | GELOGI("File %s does not exit, it will be created.", filename)); | ||||
| real_path = std::string(tmp_path);) | real_path = std::string(tmp_path);) | ||||
| // Open file, only the current user can read and write, to avoid malicious application access | // Open file, only the current user can read and write, to avoid malicious application access | ||||
| // Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability. | // Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability. | ||||
| mode_t mode = S_IRUSR | S_IWUSR; | |||||
| mmMode_t mode = M_IRUSR | M_IWUSR; | |||||
| int32_t fd = mmOpen2(real_path.c_str(), O_RDWR | O_CREAT | O_APPEND, mode); | |||||
| int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | O_TRUNC, mode); | |||||
| if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | ||||
| GELOGE(kInvalidFd, "open file failed. errno = %d, %s", fd, strerror(errno)); | GELOGE(kInvalidFd, "open file failed. errno = %d, %s", fd, strerror(errno)); | ||||
| return kInvalidFd; | return kInvalidFd; | ||||
| @@ -118,19 +118,19 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||||
| // data overflow check totally | // data overflow check totally | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | ||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", h_o, w_o); | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | ||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", n_o, c_o); | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| auto t1 = h_o * w_o; | auto t1 = h_o * w_o; | ||||
| auto t2 = n_o * c_o; | auto t2 = n_o * c_o; | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", t1, t2); | |||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | ||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", total_ele_cnt, size); | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| int64_t dst_size = total_ele_cnt * size; | int64_t dst_size = total_ele_cnt * size; | ||||
| if (dst_size == 0) { | if (dst_size == 0) { | ||||
| @@ -205,20 +205,20 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||||
| // data overflow check | // data overflow check | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | ||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", h_o, w_o); | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | ||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", n_o, c_o); | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| auto t1 = h_o * w_o; | auto t1 = h_o * w_o; | ||||
| auto t2 = n_o * c_o; | auto t2 = n_o * c_o; | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", t1, t2); | |||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | ||||
| int size = GetSizeByDataType(args.src_data_type); | int size = GetSizeByDataType(args.src_data_type); | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | ||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", total_ele_cnt, size); | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||||
| return INTERNAL_ERROR); | return INTERNAL_ERROR); | ||||
| int64_t dst_size = total_ele_cnt * size; | int64_t dst_size = total_ele_cnt * size; | ||||
| @@ -30,8 +30,10 @@ const uint8_t kPrefixIndex = 9; | |||||
| namespace ge { | namespace ge { | ||||
| void OpTilingManager::ClearHandles() noexcept { | void OpTilingManager::ClearHandles() noexcept { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| if (dlclose(handle.second) != 0) { | |||||
| GELOGE(FAILED, "Failed to close handle of %s: %s", handle.first.c_str(), dlerror()); | |||||
| if (mmDlclose(handle.second) != 0) { | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGE(FAILED, "Failed to close handle of %s: %s", handle.first.c_str(), error); | |||||
| } | } | ||||
| } | } | ||||
| handles_.clear(); | handles_.clear(); | ||||
| @@ -40,11 +42,12 @@ void OpTilingManager::ClearHandles() noexcept { | |||||
| OpTilingManager::~OpTilingManager() { ClearHandles(); } | OpTilingManager::~OpTilingManager() { ClearHandles(); } | ||||
| std::string OpTilingManager::GetPath() { | std::string OpTilingManager::GetPath() { | ||||
| const char *opp_path_env = std::getenv(kEnvName); | |||||
| char opp_path_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(kEnvName, opp_path_env, MMPA_MAX_PATH); | |||||
| std::string opp_path = kDefaultPath; | std::string opp_path = kDefaultPath; | ||||
| if (opp_path_env != nullptr) { | |||||
| char resolved_path[PATH_MAX]; | |||||
| if (realpath(opp_path_env, resolved_path) == NULL) { | |||||
| if (res == EN_OK) { | |||||
| char resolved_path[MMPA_MAX_PATH]; | |||||
| if (mmRealPath(opp_path_env, resolved_path, MMPA_MAX_PATH) != EN_OK) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | ErrorManager::GetInstance().ATCReportErrMessage( | ||||
| "E19024", {"env", "value", "situation"}, {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); | "E19024", {"env", "value", "situation"}, {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); | ||||
| GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); | GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); | ||||
| @@ -66,16 +69,20 @@ void OpTilingManager::LoadSo() { | |||||
| std::string built_in_name = kDefaultBuiltInTilingPath.substr(kPrefixIndex); | std::string built_in_name = kDefaultBuiltInTilingPath.substr(kPrefixIndex); | ||||
| std::string custom_name = kDefaultCustomTilingPath.substr(kPrefixIndex); | std::string custom_name = kDefaultCustomTilingPath.substr(kPrefixIndex); | ||||
| void *handle_bi = dlopen(built_in_tiling_lib.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| void *handle_bi = mmDlopen(built_in_tiling_lib.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| if (handle_bi == nullptr) { | if (handle_bi == nullptr) { | ||||
| GELOGW("Failed to dlopen %s!", dlerror()); | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed to dlopen %s!", error); | |||||
| } else { | } else { | ||||
| handles_[built_in_name] = handle_bi; | handles_[built_in_name] = handle_bi; | ||||
| } | } | ||||
| void *handle_ct = dlopen(custom_tiling_lib.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| void *handle_ct = mmDlopen(custom_tiling_lib.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| if (handle_ct == nullptr) { | if (handle_ct == nullptr) { | ||||
| GELOGW("Failed to dlopen %s!", dlerror()); | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed to dlopen %s!", error); | |||||
| } else { | } else { | ||||
| handles_[custom_name] = handle_ct; | handles_[custom_name] = handle_ct; | ||||
| } | } | ||||
| @@ -16,9 +16,7 @@ | |||||
| #include "common/ge/plugin_manager.h" | #include "common/ge/plugin_manager.h" | ||||
| #include <dirent.h> | |||||
| #include <sys/stat.h> | #include <sys/stat.h> | ||||
| #include <unistd.h> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <cstring> | #include <cstring> | ||||
| #include <fstream> | #include <fstream> | ||||
| @@ -38,8 +36,10 @@ const char *const kExt = ".so"; // supported extension of shared obje | |||||
| namespace ge { | namespace ge { | ||||
| void PluginManager::ClearHandles_() noexcept { | void PluginManager::ClearHandles_() noexcept { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| if (dlclose(handle.second) != 0) { | |||||
| GELOGW("Failed to close handle of %s: %s", handle.first.c_str(), dlerror()); | |||||
| if (mmDlclose(handle.second) != 0) { | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed to close handle of %s: %s", handle.first.c_str(), error); | |||||
| } | } | ||||
| } | } | ||||
| handles_.clear(); | handles_.clear(); | ||||
| @@ -48,18 +48,18 @@ void PluginManager::ClearHandles_() noexcept { | |||||
| PluginManager::~PluginManager() { ClearHandles_(); } | PluginManager::~PluginManager() { ClearHandles_(); } | ||||
| string PluginManager::GetPath() { | string PluginManager::GetPath() { | ||||
| Dl_info dl_info; | |||||
| if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) { | |||||
| mmDlInfo dl_info; | |||||
| if (mmDladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) != EN_OK) { | |||||
| GELOGW("Failed to read the shared library file path!"); | GELOGW("Failed to read the shared library file path!"); | ||||
| return string(); | return string(); | ||||
| } else { | } else { | ||||
| std::string so_path = dl_info.dli_fname; | std::string so_path = dl_info.dli_fname; | ||||
| char path[PATH_MAX] = {0}; | |||||
| if (so_path.length() >= PATH_MAX) { | |||||
| char path[MMPA_MAX_PATH] = {0}; | |||||
| if (so_path.length() >= MMPA_MAX_PATH) { | |||||
| GELOGW("The shared library file path is too long!"); | GELOGW("The shared library file path is too long!"); | ||||
| return string(); | return string(); | ||||
| } | } | ||||
| if (realpath(so_path.c_str(), path) == nullptr) { | |||||
| if (mmRealPath(so_path.c_str(), path, MMPA_MAX_PATH) != EN_OK) { | |||||
| GELOGW("Failed to get realpath of %s", so_path.c_str()); | GELOGW("Failed to get realpath of %s", so_path.c_str()); | ||||
| return string(); | return string(); | ||||
| } | } | ||||
| @@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| std::vector<std::string> path_vec; | std::vector<std::string> path_vec; | ||||
| SplitPath(path, path_vec); | SplitPath(path, path_vec); | ||||
| for (const auto &single_path : path_vec) { | for (const auto &single_path : path_vec) { | ||||
| GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, | |||||
| GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID, | |||||
| "The shared library file path is too long!"); | "The shared library file path is too long!"); | ||||
| continue); | continue); | ||||
| // load break when number of loaded so reach maximum | // load break when number of loaded so reach maximum | ||||
| @@ -119,16 +119,18 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str()); | GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str()); | ||||
| // load continue when dlopen is failed | // load continue when dlopen is failed | ||||
| auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| auto handle = mmDlopen(file_path_dlopen.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| if (handle == nullptr) { | if (handle == nullptr) { | ||||
| GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", dlerror()); | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", error); | |||||
| continue; | continue; | ||||
| } | } | ||||
| // load continue when so is invalid | // load continue when so is invalid | ||||
| bool is_valid = true; | bool is_valid = true; | ||||
| for (const auto &func_name : func_check_list) { | for (const auto &func_name : func_check_list) { | ||||
| auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); | |||||
| auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), | GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), | ||||
| func_name.c_str()); | func_name.c_str()); | ||||
| @@ -137,7 +139,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| } | } | ||||
| } | } | ||||
| if (!is_valid) { | if (!is_valid) { | ||||
| GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); | |||||
| GE_LOGE_IF(mmDlclose(handle), "Failed to dlclose."); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -197,22 +199,29 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| so_list_.clear(); | so_list_.clear(); | ||||
| ClearHandles_(); | ClearHandles_(); | ||||
| char canonical_path[PATH_MAX] = {0}; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path.length() >= PATH_MAX, GELOGW("File path is too long!"); | |||||
| char canonical_path[MMPA_MAX_PATH] = {0}; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path.length() >= MMPA_MAX_PATH, GELOGW("File path is too long!"); | |||||
| return FAILED, "File path is too long!"); | return FAILED, "File path is too long!"); | ||||
| if (realpath(path.c_str(), canonical_path) == nullptr) { | |||||
| if (mmRealPath(path.c_str(), canonical_path, MMPA_MAX_PATH) != EN_OK) { | |||||
| GELOGW("Failed to get realpath of %s", path.c_str()); | GELOGW("Failed to get realpath of %s", path.c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| DIR *dir = opendir(canonical_path); | |||||
| if (dir == nullptr) { | |||||
| GELOGW("Invalid path for load: %s", path.c_str()); | |||||
| return SUCCESS; | |||||
| INT32 is_dir = mmIsDir(canonical_path); | |||||
| // Lib plugin path not exist | |||||
| if (is_dir != EN_OK) { | |||||
| GELOGW("Invalid path for load: %s", path.c_str()); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| struct dirent *entry = nullptr; | |||||
| while ((entry = readdir(dir)) != nullptr) { | |||||
| mmDirent **entries = nullptr; | |||||
| auto ret = mmScandir(canonical_path, &entries, nullptr, nullptr); | |||||
| if (ret < EN_OK) { | |||||
| GELOGW("scan dir failed. path = %s, ret = %d", canonical_path, ret); | |||||
| return FAILED; | |||||
| } | |||||
| for (int i = 0; i < ret; ++i) { | |||||
| mmDirent *entry = entries[i]; | |||||
| // read fileName and fileType | // read fileName and fileType | ||||
| std::string file_name = entry->d_name; | std::string file_name = entry->d_name; | ||||
| unsigned char file_type = entry->d_type; | unsigned char file_type = entry->d_type; | ||||
| @@ -250,9 +259,11 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| GELOGI("Dlopen so path name: %s. ", file_path_dlopen.c_str()); | GELOGI("Dlopen so path name: %s. ", file_path_dlopen.c_str()); | ||||
| // load continue when dlopen is failed | // load continue when dlopen is failed | ||||
| auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| auto handle = mmDlopen(file_path_dlopen.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| if (handle == nullptr) { | if (handle == nullptr) { | ||||
| GELOGW("Failed in dlopen %s!", dlerror()); | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed in dlopen %s!", error); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -261,7 +272,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| // load continue when so is invalid | // load continue when so is invalid | ||||
| bool is_valid = true; | bool is_valid = true; | ||||
| for (const auto &func_name : func_check_list) { | for (const auto &func_name : func_check_list) { | ||||
| auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); | |||||
| auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGW("The %s is skipped since function %s is not existed!", file_name.c_str(), func_name.c_str()); | GELOGW("The %s is skipped since function %s is not existed!", file_name.c_str(), func_name.c_str()); | ||||
| is_valid = false; | is_valid = false; | ||||
| @@ -269,7 +280,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| } | } | ||||
| } | } | ||||
| if (!is_valid) { | if (!is_valid) { | ||||
| GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); | |||||
| GE_LOGE_IF(mmDlclose(handle), "Failed to dlclose."); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -279,7 +290,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| handles_[string(file_name)] = handle; | handles_[string(file_name)] = handle; | ||||
| num_of_loaded_so++; | num_of_loaded_so++; | ||||
| } | } | ||||
| closedir(dir); | |||||
| mmScandirFree(entries, ret); | |||||
| if (num_of_loaded_so == 0) { | if (num_of_loaded_so == 0) { | ||||
| GELOGW("No loadable shared library found in the path: %s", path.c_str()); | GELOGW("No loadable shared library found in the path: %s", path.c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -17,7 +17,6 @@ | |||||
| #ifndef GE_COMMON_GE_PLUGIN_MANAGER_H_ | #ifndef GE_COMMON_GE_PLUGIN_MANAGER_H_ | ||||
| #define GE_COMMON_GE_PLUGIN_MANAGER_H_ | #define GE_COMMON_GE_PLUGIN_MANAGER_H_ | ||||
| #include <dlfcn.h> | |||||
| #include <functional> | #include <functional> | ||||
| #include <iostream> | #include <iostream> | ||||
| #include <map> | #include <map> | ||||
| @@ -30,6 +29,7 @@ | |||||
| #include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
| #include "engine/dnnengine.h" | #include "engine/dnnengine.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "mmpa/mmpa_api.h" | |||||
| namespace ge { | namespace ge { | ||||
| using SoToHandleMap = std::map<std::string, void *>; | using SoToHandleMap = std::map<std::string, void *>; | ||||
| @@ -57,7 +57,7 @@ class PluginManager { | |||||
| template <typename R, typename... Types> | template <typename R, typename... Types> | ||||
| Status GetAllFunctions(const string &func_name, map<string, function<R(Types... args)>> &funcs) { | Status GetAllFunctions(const string &func_name, map<string, function<R(Types... args)>> &funcs) { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| auto real_fn = (R(*)(Types...))dlsym(handle.second, func_name.c_str()); | |||||
| auto real_fn = (R(*)(Types...))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGW("Failed to get function %s in %s!", func_name.c_str(), handle.first.c_str()); | GELOGW("Failed to get function %s in %s!", func_name.c_str(), handle.first.c_str()); | ||||
| return GE_PLGMGR_FUNC_NOT_EXIST; | return GE_PLGMGR_FUNC_NOT_EXIST; | ||||
| @@ -72,7 +72,7 @@ class PluginManager { | |||||
| Status InvokeAll(const string &func_name, Types... args) { | Status InvokeAll(const string &func_name, Types... args) { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| // If the funcName is existed, signature of realFn can be casted to any type | // If the funcName is existed, signature of realFn can be casted to any type | ||||
| auto real_fn = (void (*)(Types...))dlsym(handle.second, func_name.c_str()); | |||||
| auto real_fn = (void (*)(Types...))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | ||||
| return GE_PLGMGR_INVOKE_FAILED; | return GE_PLGMGR_INVOKE_FAILED; | ||||
| @@ -87,7 +87,7 @@ class PluginManager { | |||||
| Status InvokeAll(const string &func_name, T arg) { | Status InvokeAll(const string &func_name, T arg) { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| // If the funcName is existed, signature of realFn can be casted to any type | // If the funcName is existed, signature of realFn can be casted to any type | ||||
| auto real_fn = (void (*)(T))dlsym(handle.second, func_name.c_str()); | |||||
| auto real_fn = (void (*)(T))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | ||||
| return GE_PLGMGR_INVOKE_FAILED; | return GE_PLGMGR_INVOKE_FAILED; | ||||
| @@ -112,7 +112,7 @@ class PluginManager { | |||||
| Status InvokeAll(const string &func_name, T1 arg) { | Status InvokeAll(const string &func_name, T1 arg) { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| // If the funcName is existed, signature of realFn can be casted to any type | // If the funcName is existed, signature of realFn can be casted to any type | ||||
| auto real_fn = (T2(*)(T1))dlsym(handle.second, func_name.c_str()); | |||||
| auto real_fn = (T2(*)(T1))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | ||||
| return GE_PLGMGR_INVOKE_FAILED; | return GE_PLGMGR_INVOKE_FAILED; | ||||
| @@ -130,7 +130,7 @@ class PluginManager { | |||||
| Status InvokeAll(const string &func_name) { | Status InvokeAll(const string &func_name) { | ||||
| for (const auto &handle : handles_) { | for (const auto &handle : handles_) { | ||||
| // If the funcName is existed, signature of realFn can be casted to any type | // If the funcName is existed, signature of realFn can be casted to any type | ||||
| auto real_fn = (T(*)())dlsym(handle.second, func_name.c_str()); | |||||
| auto real_fn = (T(*)())mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | ||||
| return GE_PLGMGR_INVOKE_FAILED; | return GE_PLGMGR_INVOKE_FAILED; | ||||
| @@ -16,8 +16,6 @@ | |||||
| #include "common/ge/tbe_plugin_manager.h" | #include "common/ge/tbe_plugin_manager.h" | ||||
| #include <dirent.h> | |||||
| #include <unistd.h> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <cstring> | #include <cstring> | ||||
| #include <fstream> | #include <fstream> | ||||
| @@ -50,9 +48,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginMana | |||||
| Status TBEPluginManager::ClearHandles_() { | Status TBEPluginManager::ClearHandles_() { | ||||
| Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
| for (const auto &handle : handles_vec_) { | for (const auto &handle : handles_vec_) { | ||||
| if (dlclose(handle) != 0) { | |||||
| if (mmDlclose(handle) != 0) { | |||||
| ret = FAILED; | ret = FAILED; | ||||
| GELOGW("Failed to close handle: %s", dlerror()); | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed to close handle: %s", error); | |||||
| } | } | ||||
| } | } | ||||
| handles_vec_.clear(); | handles_vec_.clear(); | ||||
| @@ -65,18 +65,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finali | |||||
| } | } | ||||
| string TBEPluginManager::GetPath() { | string TBEPluginManager::GetPath() { | ||||
| Dl_info dl_info; | |||||
| if (dladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) == 0) { | |||||
| mmDlInfo dl_info; | |||||
| if (mmDladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) != EN_OK) { | |||||
| GELOGW("Failed to read so path!"); | GELOGW("Failed to read so path!"); | ||||
| return string(); | return string(); | ||||
| } else { | } else { | ||||
| string so_path = dl_info.dli_fname; | string so_path = dl_info.dli_fname; | ||||
| char path[PATH_MAX] = {0}; | |||||
| if (so_path.length() >= PATH_MAX) { | |||||
| char path[MMPA_MAX_PATH] = {0}; | |||||
| if (so_path.length() >= MMPA_MAX_PATH) { | |||||
| GELOGW("File path is too long!"); | GELOGW("File path is too long!"); | ||||
| return string(); | return string(); | ||||
| } | } | ||||
| if (realpath(so_path.c_str(), path) == nullptr) { | |||||
| if (mmRealPath(so_path.c_str(), path, MMPA_MAX_PATH) != EN_OK) { | |||||
| GELOGW("Failed to get realpath of %s", so_path.c_str()); | GELOGW("Failed to get realpath of %s", so_path.c_str()); | ||||
| return string(); | return string(); | ||||
| } | } | ||||
| @@ -108,35 +108,36 @@ void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_lis | |||||
| GELOGW("RealPath is empty."); | GELOGW("RealPath is empty."); | ||||
| return; | return; | ||||
| } | } | ||||
| struct stat stat_buf; | |||||
| if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { | |||||
| GELOGW("%s is not a dir.", real_path.c_str()); | |||||
| return; | |||||
| } | |||||
| struct dirent *dent(0); | |||||
| DIR *dir = opendir(real_path.c_str()); | |||||
| // Plugin path does not exist | |||||
| if (dir == nullptr) { | |||||
| GELOGW("Open directory %s failed.", real_path.c_str()); | |||||
| return; | |||||
| INT32 is_dir = mmIsDir(real_path.c_str()); | |||||
| // Lib plugin path not exist | |||||
| if (is_dir != EN_OK) { | |||||
| GELOGW("%s is not a dir.", real_path.c_str()); | |||||
| return; | |||||
| } | } | ||||
| while ((dent = readdir(dir)) != nullptr) { | |||||
| if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; | |||||
| string name = dent->d_name; | |||||
| string full_name = real_path + "/" + name; | |||||
| const string so_suff = ".so"; | |||||
| const string caffe_parser_so_suff = "lib_caffe_parser.so"; | |||||
| const string aicpu_so_suff = "_aicpu.so"; | |||||
| const string aicpu_host_so_suff = "_online.so"; | |||||
| if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { | |||||
| ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, | |||||
| aicpu_host_so_suff); | |||||
| } else { | |||||
| FindParserSo(full_name, file_list, caffe_parser_path); | |||||
| } | |||||
| mmDirent **entries = nullptr; | |||||
| auto ret = mmScandir(real_path.c_str(), &entries, nullptr, nullptr); | |||||
| if (ret < EN_OK) { | |||||
| GELOGW("scan dir failed. path = %s, ret = %d", real_path.c_str(), ret); | |||||
| return; | |||||
| } | |||||
| for (int i = 0; i < ret; ++i) { | |||||
| mmDirent *dent = entries[i]; | |||||
| if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; | |||||
| string name = dent->d_name; | |||||
| string full_name = real_path + "/" + name; | |||||
| const string so_suff = ".so"; | |||||
| const string caffe_parser_so_suff = "lib_caffe_parser.so"; | |||||
| const string aicpu_so_suff = "_aicpu.so"; | |||||
| const string aicpu_host_so_suff = "_online.so"; | |||||
| if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { | |||||
| ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, | |||||
| aicpu_host_so_suff); | |||||
| } else { | |||||
| FindParserSo(full_name, file_list, caffe_parser_path); | |||||
| } | |||||
| } | } | ||||
| closedir(dir); | |||||
| mmScandirFree(entries, ret); | |||||
| } | } | ||||
| void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) { | void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) { | ||||
| @@ -159,8 +160,9 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||||
| fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); | fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); | ||||
| GELOGI("Framework type is %s.", fmk_type.c_str()); | GELOGI("Framework type is %s.", fmk_type.c_str()); | ||||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||||
| if (path_env != nullptr) { | |||||
| char path_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv("ASCEND_OPP_PATH", path_env, MMPA_MAX_PATH); | |||||
| if (res == EN_OK) { | |||||
| std::string path = path_env; | std::string path = path_env; | ||||
| customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); | customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); | ||||
| GELOGI("Get custom so path from env : %s", path_env); | GELOGI("Get custom so path from env : %s", path_env); | ||||
| @@ -210,9 +212,11 @@ void TBEPluginManager::LoadPluginSo(const std::map<string, string> &options) { | |||||
| for (auto elem : file_list) { | for (auto elem : file_list) { | ||||
| StringUtils::Trim(elem); | StringUtils::Trim(elem); | ||||
| void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); | |||||
| void *handle = mmDlopen(elem.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL | MMPA_RTLD_NODELETE); | |||||
| if (handle == nullptr) { | if (handle == nullptr) { | ||||
| GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror()); | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), error); | |||||
| } else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) { | } else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) { | ||||
| // Close dl when the program exist, not close here | // Close dl when the program exist, not close here | ||||
| GELOGI("Plugin load %s success.", elem.c_str()); | GELOGI("Plugin load %s success.", elem.c_str()); | ||||
| @@ -17,7 +17,6 @@ | |||||
| #ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | #ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | ||||
| #define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | #define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | ||||
| #include <dlfcn.h> | |||||
| #include <functional> | #include <functional> | ||||
| #include <iostream> | #include <iostream> | ||||
| #include <map> | #include <map> | ||||
| @@ -110,11 +110,12 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
| libascend_protobuf \ | libascend_protobuf \ | ||||
| libc_sec \ | libc_sec \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | |||||
| libgraph \ | libgraph \ | ||||
| libregister \ | libregister \ | ||||
| liberror_manager \ | liberror_manager \ | ||||
| LOCAL_STATIC_LIBRARIES += libmmpa | |||||
| LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
| include $(BUILD_HOST_SHARED_LIBRARY) | include $(BUILD_HOST_SHARED_LIBRARY) | ||||
| @@ -152,11 +153,12 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
| libascend_protobuf \ | libascend_protobuf \ | ||||
| libc_sec \ | libc_sec \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | |||||
| libgraph \ | libgraph \ | ||||
| libregister \ | libregister \ | ||||
| liberror_manager \ | liberror_manager \ | ||||
| LOCAL_STATIC_LIBRARIES += libmmpa | |||||
| ifeq ($(device_os),android) | ifeq ($(device_os),android) | ||||
| LOCAL_LDFLAGS += -ldl | LOCAL_LDFLAGS += -ldl | ||||
| LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | ||||
| @@ -14,8 +14,6 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include <fcntl.h> | |||||
| #include <unistd.h> | |||||
| #include <climits> | #include <climits> | ||||
| #include <cstdio> | #include <cstdio> | ||||
| #include <fstream> | #include <fstream> | ||||
| @@ -448,12 +446,12 @@ Status ModelCacheHelper::SaveJsonToFile(const string &file_name, const Json &jso | |||||
| } | } | ||||
| const string path = cache_path_ + file_name; | const string path = cache_path_ + file_name; | ||||
| const int FILE_AUTHORITY = 0600; | const int FILE_AUTHORITY = 0600; | ||||
| int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, FILE_AUTHORITY); | |||||
| int fd = mmOpen2(path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, FILE_AUTHORITY); | |||||
| if (fd < 0) { | if (fd < 0) { | ||||
| GELOGW("Fail to open the file: %s.", path.c_str()); | GELOGW("Fail to open the file: %s.", path.c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| if (close(fd) != 0) { | |||||
| if (mmClose(fd) != 0) { | |||||
| GELOGW("Fail to close the file: %s.", path.c_str()); | GELOGW("Fail to close the file: %s.", path.c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -98,7 +98,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| ge::Buffer model_buffer; | ge::Buffer model_buffer; | ||||
| (void)model_tmp->Save(model_buffer); | (void)model_tmp->Save(model_buffer); | ||||
| GELOGI("MODEL_DEF size is %zu", model_buffer.GetSize()); | |||||
| GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize()); | |||||
| if (model_buffer.GetSize() > 0) { | if (model_buffer.GetSize() > 0) { | ||||
| if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(), | if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(), | ||||
| model_buffer.GetSize()) != SUCCESS) { | model_buffer.GetSize()) != SUCCESS) { | ||||
| @@ -107,7 +107,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| } | } | ||||
| } | } | ||||
| auto ge_model_weight = ge_model->GetWeight(); | auto ge_model_weight = ge_model->GetWeight(); | ||||
| GELOGI("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | |||||
| GELOGD("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | |||||
| // weight is not necessary | // weight is not necessary | ||||
| if (ge_model_weight.GetSize() > 0) { | if (ge_model_weight.GetSize() > 0) { | ||||
| GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | ||||
| @@ -117,7 +117,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| } | } | ||||
| TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); | TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); | ||||
| GELOGI("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); | |||||
| GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); | |||||
| if (tbe_kernel_store.DataSize() > 0) { | if (tbe_kernel_store.DataSize() > 0) { | ||||
| GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | ||||
| ModelPartitionType::TBE_KERNELS, | ModelPartitionType::TBE_KERNELS, | ||||
| @@ -129,7 +129,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | ||||
| CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); | CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); | ||||
| GELOGI("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); | |||||
| GELOGD("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); | |||||
| if (cust_aicpu_kernel_store.DataSize() > 0) { | if (cust_aicpu_kernel_store.DataSize() > 0) { | ||||
| GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | ||||
| ModelPartitionType::CUST_AICPU_KERNELS, | ModelPartitionType::CUST_AICPU_KERNELS, | ||||
| @@ -155,8 +155,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| } | } | ||||
| (void)model_task_def->SerializePartialToArray(task_buffer.GetData(), static_cast<int>(partition_task_size)); | (void)model_task_def->SerializePartialToArray(task_buffer.GetData(), static_cast<int>(partition_task_size)); | ||||
| GELOGI("TASK_INFO op_size:%d, stream_num:%u", model_task_def->op().size(), model_task_def->stream_num()); | |||||
| GELOGI("TASK_INFO size is %zu", partition_task_size); | |||||
| GELOGD("TASK_INFO op_size:%d, stream_num:%u", model_task_def->op().size(), model_task_def->stream_num()); | |||||
| GELOGD("TASK_INFO size is %zu", partition_task_size); | |||||
| if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(), | if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(), | ||||
| partition_task_size) != SUCCESS) { | partition_task_size) != SUCCESS) { | ||||
| @@ -168,7 +168,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| model_header.platform_type = ge_model->GetPlatformType(); | model_header.platform_type = ge_model->GetPlatformType(); | ||||
| model_header.om_ir_version = ge_model->GetVersion(); | model_header.om_ir_version = ge_model->GetVersion(); | ||||
| std::string platform_version = ge_model->GetPlatformVersion(); | std::string platform_version = ge_model->GetPlatformVersion(); | ||||
| GELOGI("Platform version save: %s", platform_version.c_str()); | |||||
| errno_t err; | errno_t err; | ||||
| err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), | err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), | ||||
| @@ -178,7 +177,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| string version = reinterpret_cast<char *>(model_header.platform_version); | string version = reinterpret_cast<char *>(model_header.platform_version); | ||||
| GELOGI("Platform version save: %s", version.c_str()); | |||||
| GELOGD("Platform version save: %s", version.c_str()); | |||||
| size_t name_size = ge_model->GetName().size(); | size_t name_size = ge_model->GetName().size(); | ||||
| name_size = name_size > (MODEL_NAME_LENGTH - 1) ? (MODEL_NAME_LENGTH - 1) : name_size; | name_size = name_size > (MODEL_NAME_LENGTH - 1) ? (MODEL_NAME_LENGTH - 1) : name_size; | ||||
| @@ -188,7 +187,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| string model_name = reinterpret_cast<char *>(model_header.name); | string model_name = reinterpret_cast<char *>(model_header.name); | ||||
| GELOGI("Model name save:%s", model_name.c_str()); | |||||
| GELOGD("Model name save:%s", model_name.c_str()); | |||||
| Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); | Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -346,7 +345,7 @@ Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) { | |||||
| ModelPartition partition_model_def; | ModelPartition partition_model_def; | ||||
| // no need to check value, DATA->NetOutput | // no need to check value, DATA->NetOutput | ||||
| om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def); | om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def); | ||||
| GELOGI("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size); | |||||
| GELOGD("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size); | |||||
| ge::Model model; | ge::Model model; | ||||
| if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) { | if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) { | ||||
| @@ -376,7 +375,7 @@ Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) { | |||||
| ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size); | ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size); | ||||
| model_->SetWeight(weight); | model_->SetWeight(weight); | ||||
| GELOGI("GetWeight size:%u", partition.size); | |||||
| GELOGD("GetWeight size:%u", partition.size); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -393,7 +392,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om | |||||
| GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); | GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| GELOGI("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); | |||||
| GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num()); | |||||
| } | } | ||||
| model_->SetModelTaskDef(task); | model_->SetModelTaskDef(task); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -404,9 +403,9 @@ Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) { | |||||
| ModelPartition partition_kernel_def; | ModelPartition partition_kernel_def; | ||||
| TBEKernelStore kernel_store; | TBEKernelStore kernel_store; | ||||
| if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def) == SUCCESS) { | if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def) == SUCCESS) { | ||||
| GELOGI("Kernels partition size:%u", partition_kernel_def.size); | |||||
| GELOGD("Kernels partition size:%u", partition_kernel_def.size); | |||||
| if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { | if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { | ||||
| GELOGI("Load tbe kernels success"); | |||||
| GELOGD("Load tbe kernels success"); | |||||
| } else { | } else { | ||||
| GELOGW("Load tbe kernels failed"); | GELOGW("Load tbe kernels failed"); | ||||
| } | } | ||||
| @@ -420,11 +419,9 @@ Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) { | |||||
| ModelPartition partition_kernel_def; | ModelPartition partition_kernel_def; | ||||
| CustAICPUKernelStore kernel_store; | CustAICPUKernelStore kernel_store; | ||||
| if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) { | if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) { | ||||
| GELOGI("Kernels partition size:%u", partition_kernel_def.size); | |||||
| GELOGD("Kernels partition size:%u", partition_kernel_def.size); | |||||
| if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { | if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { | ||||
| GELOGI("Load cust aicpu kernels success"); | GELOGI("Load cust aicpu kernels success"); | ||||
| } else { | |||||
| GELOGW("Load cust aicpu kernels failed"); | |||||
| } | } | ||||
| } | } | ||||
| model_->SetCustAICPUKernelStore(kernel_store); | model_->SetCustAICPUKernelStore(kernel_store); | ||||
| @@ -123,7 +123,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
| return ACL_ERROR_GE_EXEC_MODEL_PARTITION_NUM_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_PARTITION_NUM_INVALID; | ||||
| } | } | ||||
| size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
| GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||||
| GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||||
| partition_table->num, sizeof(ModelFileHeader), mem_offset); | partition_table->num, sizeof(ModelFileHeader), mem_offset); | ||||
| if (model_data_size <= mem_offset) { | if (model_data_size <= mem_offset) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | ||||
| @@ -143,7 +143,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | ||||
| } | } | ||||
| mem_offset += partition.size; | mem_offset += partition.size; | ||||
| GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||||
| GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -167,7 +167,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSave | |||||
| ModelPartition partition = context_.partition_datas_[i]; | ModelPartition partition = context_.partition_datas_[i]; | ||||
| partition_table->partition[i] = {partition.type, mem_offset, partition.size}; | partition_table->partition[i] = {partition.type, mem_offset, partition.size}; | ||||
| mem_offset += partition.size; | mem_offset += partition.size; | ||||
| GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||||
| GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||||
| } | } | ||||
| return partition_table; | return partition_table; | ||||
| } | } | ||||
| @@ -191,7 +191,7 @@ Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *outp | |||||
| (void)save_param.pri_key_file; | (void)save_param.pri_key_file; | ||||
| Status ret = SaveModelToFile(output_file, model, is_offline); | Status ret = SaveModelToFile(output_file, model, is_offline); | ||||
| if (ret == SUCCESS) { | if (ret == SUCCESS) { | ||||
| GELOGI("Generate model with encrypt."); | |||||
| GELOGD("Generate model with encrypt."); | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -213,7 +213,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat | |||||
| FMK_UINT32_ADDCHECK(size_of_table, model_data_len) | FMK_UINT32_ADDCHECK(size_of_table, model_data_len) | ||||
| model_header_.length = size_of_table + model_data_len; | model_header_.length = size_of_table + model_data_len; | ||||
| GELOGI("Sizeof(ModelFileHeader):%zu,sizeof(ModelPartitionTable):%u, model_data_len:%u, model_total_len:%zu", | |||||
| GELOGD("Sizeof(ModelFileHeader):%zu,sizeof(ModelPartitionTable):%u, model_data_len:%u, model_total_len:%zu", | |||||
| sizeof(ModelFileHeader), size_of_table, model_data_len, model_header_.length + sizeof(ModelFileHeader)); | sizeof(ModelFileHeader), size_of_table, model_data_len, model_header_.length + sizeof(ModelFileHeader)); | ||||
| std::vector<ModelPartition> partition_datas = context_.partition_datas_; | std::vector<ModelPartition> partition_datas = context_.partition_datas_; | ||||
| @@ -224,7 +224,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat | |||||
| ret = FileSaver::SaveToBuffWithFileHeader(model_header_, *partition_table, partition_datas, model); | ret = FileSaver::SaveToBuffWithFileHeader(model_header_, *partition_table, partition_datas, model); | ||||
| } | } | ||||
| if (ret == SUCCESS) { | if (ret == SUCCESS) { | ||||
| GELOGI("Save model success without encrypt."); | |||||
| GELOGD("Save model success without encrypt."); | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| #else | #else | ||||
| @@ -51,7 +51,7 @@ bool KernelStore::Build() { | |||||
| kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length()); | kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length()); | ||||
| kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize()); | kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize()); | ||||
| GELOGI("get kernel bin name %s, addr %p, size %u", | |||||
| GELOGD("get kernel bin name %s, addr %p, size %u", | |||||
| kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); | kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); | ||||
| mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); | mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); | ||||
| GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); | GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); | ||||
| @@ -95,7 +95,7 @@ bool KernelStore::Load(const uint8_t *data, const size_t &len) { | |||||
| std::string name(next_buffer, kernel_head->name_len); | std::string name(next_buffer, kernel_head->name_len); | ||||
| next_buffer += kernel_head->name_len; | next_buffer += kernel_head->name_len; | ||||
| GELOGI("Load kernel from om:%s,%u,%u", name.c_str(), kernel_head->name_len, kernel_head->bin_len); | |||||
| GELOGD("Load kernel from om:%s,%u,%u", name.c_str(), kernel_head->name_len, kernel_head->bin_len); | |||||
| std::vector<char> kernel_bin(next_buffer, next_buffer + kernel_head->bin_len); | std::vector<char> kernel_bin(next_buffer, next_buffer + kernel_head->bin_len); | ||||
| KernelBinPtr teb_kernel_ptr = ge::MakeShared<KernelBin>(name, std::move(kernel_bin)); | KernelBinPtr teb_kernel_ptr = ge::MakeShared<KernelBin>(name, std::move(kernel_bin)); | ||||
| if (teb_kernel_ptr != nullptr) { | if (teb_kernel_ptr != nullptr) { | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include "common/model_parser/base.h" | #include "common/model_parser/base.h" | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include <securec.h> | #include <securec.h> | ||||
| #include <sys/sysinfo.h> | |||||
| #include <fstream> | #include <fstream> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| @@ -107,7 +106,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo | |||||
| model_data = data; | model_data = data; | ||||
| model_len = file_header->length; | model_len = file_header->length; | ||||
| GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | |||||
| GELOGD("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | |||||
| } else { | } else { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported."); | GELOGE(ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported."); | ||||
| res = ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION; | res = ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION; | ||||
| @@ -16,9 +16,7 @@ | |||||
| #include "common/model_saver.h" | #include "common/model_saver.h" | ||||
| #include <fcntl.h> | |||||
| #include <securec.h> | #include <securec.h> | ||||
| #include <unistd.h> | |||||
| #include <cstdlib> | #include <cstdlib> | ||||
| #include <fstream> | #include <fstream> | ||||
| #include <string> | #include <string> | ||||
| @@ -51,14 +49,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| char real_path[PATH_MAX] = {0}; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path) >= PATH_MAX, return FAILED, "file path is too long!"); | |||||
| GE_IF_BOOL_EXEC(realpath(file_path, real_path) == nullptr, | |||||
| char real_path[MMPA_MAX_PATH] = {0}; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path) >= MMPA_MAX_PATH, return FAILED, "file path is too long!"); | |||||
| GE_IF_BOOL_EXEC(mmRealPath(file_path, real_path, MMPA_MAX_PATH) != EN_OK, | |||||
| GELOGI("File %s does not exit, it will be created.", file_path)); | GELOGI("File %s does not exit, it will be created.", file_path)); | ||||
| // Open file | // Open file | ||||
| mode_t mode = S_IRUSR | S_IWUSR; | |||||
| int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | |||||
| mmMode_t mode = M_IRUSR | M_IWUSR; | |||||
| int32_t fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode); | |||||
| if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {file_path, strerror(errno)}); | ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {file_path, strerror(errno)}); | ||||
| GELOGE(FAILED, "Open file[%s] failed. %s", file_path, strerror(errno)); | GELOGE(FAILED, "Open file[%s] failed. %s", file_path, strerror(errno)); | ||||
| @@ -72,7 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | ErrorManager::GetInstance().ATCReportErrMessage( | ||||
| "E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); | "E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); | ||||
| // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose | // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose | ||||
| GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); | |||||
| GELOGE(FAILED, "Write to file failed. errno = %ld, %s", mmpa_ret, strerror(errno)); | |||||
| ret = FAILED; | ret = FAILED; | ||||
| } | } | ||||
| // Close file | // Close file | ||||
| @@ -214,8 +214,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Pa | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) { | ||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| // enable profiling support two ways: env and front end | // enable profiling support two ways: env and front end | ||||
| const char *profiling_mode = std::getenv("PROFILING_MODE"); | |||||
| const char *prof_options = std::getenv("PROFILING_OPTIONS"); | |||||
| char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 }; | |||||
| char prof_options_temp[MMPA_MAX_PATH] = { 0x00 }; | |||||
| (void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH); | |||||
| (void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH ); | |||||
| const char *profiling_mode = profiling_mode_temp; | |||||
| const char *prof_options = prof_options_temp; | |||||
| if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { | if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { | ||||
| is_load_profiling_ = false; | is_load_profiling_ = false; | ||||
| is_execute_profiling_ = false; | is_execute_profiling_ = false; | ||||
| @@ -554,7 +558,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
| GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | ||||
| return; | return; | ||||
| } | } | ||||
| GELOGI("current logic_device_id:%d", logic_device_id); | |||||
| GELOGD("current logic_device_id:%d", logic_device_id); | |||||
| if (check_device) { | if (check_device) { | ||||
| auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | ||||
| if (ret == device_id_.end()) { | if (ret == device_id_.end()) { | ||||
| @@ -562,11 +566,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
| return; | return; | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("start ProfilingTaskDescInfo."); | |||||
| GELOGD("start ProfilingTaskDescInfo."); | |||||
| ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ||||
| GELOGI("start ProfilingGraphDescInfo."); | |||||
| GELOGD("start ProfilingGraphDescInfo."); | |||||
| ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | ||||
| GELOGI("Report profiling data for GE end."); | |||||
| GELOGD("Report profiling data for GE end."); | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -855,7 +859,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||||
| for (int32_t i = 0; i < device_num; i++) { | for (int32_t i = 0; i < device_num; i++) { | ||||
| device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | ||||
| } | } | ||||
| GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||||
| GELOGD("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||||
| rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -874,7 +878,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||||
| GELOGW("Prof start: load model module is invalid."); | GELOGW("Prof start: load model module is invalid."); | ||||
| } | } | ||||
| UpdateDeviceIdModuleMap(kProfStart, module, device_list); | UpdateDeviceIdModuleMap(kProfStart, module, device_list); | ||||
| GELOGI("Prof start profiling success."); | |||||
| GELOGD("Prof start profiling success."); | |||||
| #endif | #endif | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -897,7 +901,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||||
| for (int32_t i = 0; i < device_num; i++) { | for (int32_t i = 0; i < device_num; i++) { | ||||
| device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | ||||
| } | } | ||||
| GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||||
| GELOGD("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||||
| rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | ||||
| @@ -917,7 +921,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||||
| GELOGW("Prof stop: load model module is invalid."); | GELOGW("Prof stop: load model module is invalid."); | ||||
| } | } | ||||
| UpdateDeviceIdModuleMap(kProfStop, module, device_list); | UpdateDeviceIdModuleMap(kProfStop, module, device_list); | ||||
| GELOGI("Prof stop profiling success."); | |||||
| GELOGD("Prof stop profiling success."); | |||||
| #endif | #endif | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -959,14 +963,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | ||||
| } | } | ||||
| GELOGI("Current logic_device_id:%d", logic_device_id); | |||||
| GELOGD("Current logic_device_id:%d", logic_device_id); | |||||
| bool execute_model_prof_on = false; | bool execute_model_prof_on = false; | ||||
| auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | ||||
| if (iter != device_id_.end()) { | if (iter != device_id_.end()) { | ||||
| execute_model_prof_on = true; | execute_model_prof_on = true; | ||||
| } | } | ||||
| GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); | |||||
| GELOGD("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); | |||||
| return is_execute_profiling_ || execute_model_prof_on; | return is_execute_profiling_ || execute_model_prof_on; | ||||
| } | } | ||||
| @@ -25,13 +25,14 @@ | |||||
| #include "common/dump/dump_properties.h" | #include "common/dump/dump_properties.h" | ||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "common/ge_compiler_options.h" | |||||
| namespace ge { | namespace ge { | ||||
| // Configuration property management | // Configuration property management | ||||
| static const char *SYSMODE __attribute__((unused)) = "FMK_SYSMODE"; | |||||
| static const char *USE_FUSION __attribute__((unused)) = "FMK_USE_FUSION"; | |||||
| static const char *TIMESTAT_ENABLE __attribute__((unused)) = "DAVINCI_TIMESTAT_ENABLE"; | |||||
| static const char *ANNDROID_DEBUG __attribute__((unused)) = "ANNDROID_DEBUG"; | |||||
| static const char *SYSMODE GE_ATTRIBUTE_UNUSED = "FMK_SYSMODE"; | |||||
| static const char *USE_FUSION GE_ATTRIBUTE_UNUSED = "FMK_USE_FUSION"; | |||||
| static const char *TIMESTAT_ENABLE GE_ATTRIBUTE_UNUSED = "DAVINCI_TIMESTAT_ENABLE"; | |||||
| static const char *ANNDROID_DEBUG GE_ATTRIBUTE_UNUSED = "ANNDROID_DEBUG"; | |||||
| class PropertiesManager { | class PropertiesManager { | ||||
| public: | public: | ||||
| @@ -16,11 +16,12 @@ | |||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| #include <fcntl.h> | |||||
| #include <sys/stat.h> | #include <sys/stat.h> | ||||
| #ifdef __GNUC__ | |||||
| #include <regex.h> | #include <regex.h> | ||||
| #include <unistd.h> | |||||
| #else | |||||
| #include <regex> | |||||
| #endif | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <climits> | #include <climits> | ||||
| #include <cstdlib> | #include <cstdlib> | ||||
| @@ -208,29 +209,30 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::string &directory_path) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::string &directory_path) { | ||||
| GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty."); | GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty."); | ||||
| auto dir_path_len = directory_path.length(); | auto dir_path_len = directory_path.length(); | ||||
| if (dir_path_len >= PATH_MAX) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, | |||||
| {directory_path, std::to_string(PATH_MAX)}); | |||||
| GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX); | |||||
| if (dir_path_len >= MMPA_MAX_PATH) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | |||||
| "E19002", {"filepath", "size"}, {directory_path, std::to_string(MMPA_MAX_PATH)}); | |||||
| GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), MMPA_MAX_PATH); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| char tmp_dir_path[PATH_MAX] = {0}; | |||||
| char tmp_dir_path[MMPA_MAX_PATH] = {0}; | |||||
| for (size_t i = 0; i < dir_path_len; i++) { | for (size_t i = 0; i < dir_path_len; i++) { | ||||
| tmp_dir_path[i] = directory_path[i]; | tmp_dir_path[i] = directory_path[i]; | ||||
| if ((tmp_dir_path[i] == '\\') || (tmp_dir_path[i] == '/')) { | if ((tmp_dir_path[i] == '\\') || (tmp_dir_path[i] == '/')) { | ||||
| if (access(tmp_dir_path, F_OK) != 0) { | |||||
| int32_t ret = mmMkdir(tmp_dir_path, S_IRUSR | S_IWUSR | S_IXUSR); // 700 | |||||
| if (mmAccess2(tmp_dir_path, M_F_OK) != EN_OK) { | |||||
| int32_t ret = mmMkdir(tmp_dir_path, M_IRUSR | M_IWUSR | M_IXUSR); // 700 | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| if (errno != EEXIST) { | if (errno != EEXIST) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); | ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); | ||||
| GELOGW("Can not create directory %s. Make sure the directory exists and writable.", directory_path.c_str()); | |||||
| GELOGW("Can not create directory %s. Make sure the directory exists and writable.", | |||||
| directory_path.c_str()); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| int32_t ret = mmMkdir(const_cast<char *>(directory_path.c_str()), S_IRUSR | S_IWUSR | S_IXUSR); // 700 | |||||
| int32_t ret = mmMkdir(const_cast<char *>(directory_path.c_str()), M_IRUSR | M_IWUSR | M_IXUSR); // 700 | |||||
| if (ret != 0) { | if (ret != 0) { | ||||
| if (errno != EEXIST) { | if (errno != EEXIST) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); | ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); | ||||
| @@ -305,9 +307,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { | ||||
| struct timeval tv {}; | |||||
| int ret = gettimeofday(&tv, nullptr); | |||||
| GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret); | |||||
| mmTimeval tv {}; | |||||
| int ret = mmGetTimeOfDay(&tv, nullptr); | |||||
| GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret); | |||||
| auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000; // 1000000: seconds to microseconds | auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000; // 1000000: seconds to microseconds | ||||
| return static_cast<uint64_t>(total_use_time); | return static_cast<uint64_t>(total_use_time); | ||||
| } | } | ||||
| @@ -347,16 +349,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6 | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| strlen(path) >= PATH_MAX, | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); | |||||
| return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(path) >= MMPA_MAX_PATH, | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(MMPA_MAX_PATH)}); | |||||
| return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH); | |||||
| // Nullptr is returned when the path does not exist or there is no permission | // Nullptr is returned when the path does not exist or there is no permission | ||||
| // Return absolute path when path is accessible | // Return absolute path when path is accessible | ||||
| std::string res; | std::string res; | ||||
| char resolved_path[PATH_MAX] = {0}; | |||||
| if (realpath(path, resolved_path) != nullptr) { | |||||
| char resolved_path[MMPA_MAX_PATH] = {0}; | |||||
| if (mmRealPath(path, resolved_path, MMPA_MAX_PATH) == EN_OK) { | |||||
| res = resolved_path; | res = resolved_path; | ||||
| } | } | ||||
| @@ -383,7 +384,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||||
| // A regular matching expression to verify the validity of the input file path | // A regular matching expression to verify the validity of the input file path | ||||
| // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | ||||
| // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | ||||
| std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||||
| #ifdef __GNUC__ | |||||
| std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||||
| #else | |||||
| std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; | |||||
| #endif | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| !ValidateStr(real_path, mode), | !ValidateStr(real_path, mode), | ||||
| @@ -392,7 +397,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||||
| return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); | return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); | ||||
| // The absolute path points to a file that is not readable | // The absolute path points to a file that is not readable | ||||
| if (access(real_path.c_str(), R_OK) != 0) { | |||||
| if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"file", "errmsg"}, {file_path.c_str(), strerror(errno)}); | ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"file", "errmsg"}, {file_path.c_str(), strerror(errno)}); | ||||
| GELOGW("Read file[%s] failed, errmsg[%s]", file_path.c_str(), strerror(errno)); | GELOGW("Read file[%s] failed, errmsg[%s]", file_path.c_str(), strerror(errno)); | ||||
| return false; | return false; | ||||
| @@ -410,15 +415,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||||
| return false; | return false; | ||||
| } | } | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage( | |||||
| "E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)}); | |||||
| return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH, | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | |||||
| "E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)}); | |||||
| return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH); | |||||
| // A regular matching expression to verify the validity of the input file path | // A regular matching expression to verify the validity of the input file path | ||||
| // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | ||||
| // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | ||||
| std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||||
| #ifdef __GNUC__ | |||||
| std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||||
| #else | |||||
| std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; | |||||
| #endif | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| !ValidateStr(file_path, mode), | !ValidateStr(file_path, mode), | ||||
| @@ -430,7 +439,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||||
| // Can get absolute path (file exists) | // Can get absolute path (file exists) | ||||
| if (!real_path.empty()) { | if (!real_path.empty()) { | ||||
| // File is not readable or writable | // File is not readable or writable | ||||
| if (access(real_path.c_str(), W_OK | F_OK) != 0) { | |||||
| if (mmAccess2(real_path.c_str(), M_W_OK | M_F_OK) != EN_OK) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19004", {"file", "errmsg"}, {real_path, strerror(errno)}); | ErrorManager::GetInstance().ATCReportErrMessage("E19004", {"file", "errmsg"}, {real_path, strerror(errno)}); | ||||
| GELOGW("Write file[%s] failed, errmsg[%s]", real_path.c_str(), strerror(errno)); | GELOGW("Write file[%s] failed, errmsg[%s]", real_path.c_str(), strerror(errno)); | ||||
| return false; | return false; | ||||
| @@ -461,6 +470,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode) { | FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode) { | ||||
| #ifdef __GNUC__ | |||||
| char ebuff[kMaxBuffSize]; | char ebuff[kMaxBuffSize]; | ||||
| regex_t reg; | regex_t reg; | ||||
| int cflags = REG_EXTENDED | REG_NOSUB; | int cflags = REG_EXTENDED | REG_NOSUB; | ||||
| @@ -482,6 +492,23 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||||
| regfree(®); | regfree(®); | ||||
| return true; | return true; | ||||
| #else | |||||
| std::wstring wstr(str.begin(), str.end()); | |||||
| std::wstring wmode(mode.begin(), mode.end()); | |||||
| std::wsmatch match; | |||||
| bool res = false; | |||||
| try { | |||||
| std::wregex reg(wmode, std::regex::icase); | |||||
| // Matching string part | |||||
| res = regex_match(wstr, match, reg); | |||||
| res = regex_search(str, std::regex("[`!@#$%^&*()|{}';',<>?]")); | |||||
| } catch (std::exception &ex) { | |||||
| GELOGW("The directory %s is invalid, error: %s.", str.c_str(), ex.what()); | |||||
| return false; | |||||
| } | |||||
| return !(res) && (str.size() == match.str().size()); | |||||
| #endif | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { | FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { | ||||
| @@ -433,7 +433,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| const char *file = file_path.data(); | const char *file = file_path.data(); | ||||
| if ((access(file, F_OK)) == -1) { | |||||
| if ((mmAccess2(file, M_F_OK)) != EN_OK) { | |||||
| if (engines_map_.size() != 0) { | if (engines_map_.size() != 0) { | ||||
| GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno)); | GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno)); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -588,7 +588,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, | |||||
| } | } | ||||
| Status GeExecutor::UnloadModel(uint32_t model_id) { | Status GeExecutor::UnloadModel(uint32_t model_id) { | ||||
| GELOGI("unload model %u begin.", model_id); | |||||
| GELOGD("unload model %u begin.", model_id); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -630,7 +630,6 @@ Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData | |||||
| // Get input and output descriptor | // Get input and output descriptor | ||||
| Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | ||||
| GELOGI("get model desc info begin."); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -665,7 +664,6 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); | GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); | ||||
| GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); | GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); | ||||
| GELOGI("get model desc info end."); | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| @@ -679,7 +677,6 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| /// | /// | ||||
| Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | ||||
| int32_t &dynamic_type) { | int32_t &dynamic_type) { | ||||
| GELOGI("Begin to get dynamic batch info."); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -690,8 +687,6 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||||
| GELOGE(ret, "GetDynamicBatchInfo failed."); | GELOGE(ret, "GetDynamicBatchInfo failed."); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Get dynamic batch info succ."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -727,7 +722,6 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64 | |||||
| /// @return execute result | /// @return execute result | ||||
| /// | /// | ||||
| Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | ||||
| GELOGI("Begin to get user designate shape info."); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -739,7 +733,6 @@ Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Get user designate shape order succ."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -782,7 +775,6 @@ Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType | |||||
| } | } | ||||
| Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | ||||
| GELOGI("Begin to get dynamic batch output shape info"); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -792,8 +784,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn | |||||
| GELOGE(ret, "Get dynamic batch output shape info failed."); | GELOGE(ret, "Get dynamic batch output shape info failed."); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Get dynamic batch output shape info succ."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -835,8 +825,6 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge | |||||
| } | } | ||||
| Status GeExecutor::CommandHandle(const Command &command) { | Status GeExecutor::CommandHandle(const Command &command) { | ||||
| GELOGI("command handle begin."); | |||||
| Status ret = GraphLoader::CommandHandle(command); | Status ret = GraphLoader::CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "CommandHandle: Command Handle failed."); | GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "CommandHandle: Command Handle failed."); | ||||
| @@ -904,7 +892,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||||
| */ | */ | ||||
| Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | ||||
| void *weight_ptr, size_t weight_size) { | void *weight_ptr, size_t weight_size) { | ||||
| GELOGI("Load model from data begin."); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -945,7 +932,6 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||||
| */ | */ | ||||
| Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | ||||
| ge::RunModelData &run_output_data, bool async_mode) { | ge::RunModelData &run_output_data, bool async_mode) { | ||||
| GELOGI("Execute model begin."); | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -387,12 +387,12 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) | LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) | ||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
| libmmpa \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
| libc_sec \ | libc_sec \ | ||||
| libascend_protobuf \ | libascend_protobuf \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | |||||
| libgraph \ | libgraph \ | ||||
| libregister \ | libregister \ | ||||
| libge_common \ | libge_common \ | ||||
| @@ -451,12 +451,12 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) | |||||
| LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) | LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) | ||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
| libmmpa \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
| libc_sec \ | libc_sec \ | ||||
| libascend_protobuf \ | libascend_protobuf \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | |||||
| libgraph \ | libgraph \ | ||||
| libregister \ | libregister \ | ||||
| libresource \ | libresource \ | ||||
| @@ -221,7 +221,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, | |||||
| Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc); | Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc); | ||||
| auto ret = op_kernel.Compute(op, named_inputs, named_outputs); | auto ret = op_kernel.Compute(op, named_inputs, named_outputs); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret); | |||||
| GELOGW("Failed to compute host cpu op. node = %s", op_desc->GetName().c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| op.BreakConnect(); | op.BreakConnect(); | ||||
| @@ -88,6 +88,25 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | include ${BUILD_HOST_SHARED_LIBRARY} | ||||
| #compiler for device libge_local_opskernel_builder.so | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_local_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libregister \ | |||||
| libgraph | |||||
| LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_SHARED_LIBRARY} | |||||
| #compiler for libge_local_opskernel_builder.so in atc | #compiler for libge_local_opskernel_builder.so in atc | ||||
| include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
| @@ -111,7 +111,7 @@ Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size); | TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI( | |||||
| GELOGD( | |||||
| "Calc op[%s:%s] out[%zu] mem size is %ld," | "Calc op[%s:%s] out[%zu] mem size is %ld," | ||||
| " format=%s, data_type=%s.", | " format=%s, data_type=%s.", | ||||
| node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | ||||
| @@ -174,7 +174,7 @@ Status GeLocalOpsKernelBuilder::GenerateTask(const Node &node, RunContext &conte | |||||
| GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); | GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); | |||||
| GELOGD("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } // namespace ge_local | } // namespace ge_local | ||||
| @@ -24,7 +24,7 @@ namespace ge_local { | |||||
| NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | ||||
| Status NoOp::Run() { | Status NoOp::Run() { | ||||
| GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str()); | |||||
| GELOGD("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str()); | |||||
| // Do nothing | // Do nothing | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -372,12 +372,12 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
| libadump_server \ | libadump_server \ | ||||
| libmsprofiler \ | libmsprofiler \ | ||||
| libmmpa \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
| libc_sec \ | libc_sec \ | ||||
| libascend_protobuf \ | libascend_protobuf \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | |||||
| libgraph \ | libgraph \ | ||||
| libregister \ | libregister \ | ||||
| libge_common \ | libge_common \ | ||||
| @@ -15,83 +15,56 @@ | |||||
| */ | */ | ||||
| #include "ge_runtime/task/hccl_task.h" | #include "ge_runtime/task/hccl_task.h" | ||||
| #include <algorithm> | |||||
| #include "ge_runtime/task/task_factory.h" | #include "ge_runtime/task/task_factory.h" | ||||
| #include "common/opskernel/ops_kernel_info_store.h" | #include "common/opskernel/ops_kernel_info_store.h" | ||||
| #include "common/opskernel/ge_task_info.h" | #include "common/opskernel/ge_task_info.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace model_runner { | namespace model_runner { | ||||
| std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<HcclTask::StreamGuard>>>> | |||||
| HcclTask::model_stream_mapping_; | |||||
| std::mutex HcclTask::model_stream_mapping_mutex_; | |||||
| HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<HcclTaskInfo> &task_info) | HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<HcclTaskInfo> &task_info) | ||||
| : TaskRepeater<HcclTaskInfo>(model_context, task_info), | : TaskRepeater<HcclTaskInfo>(model_context, task_info), | ||||
| task_info_(task_info), | task_info_(task_info), | ||||
| stream_(nullptr), | stream_(nullptr), | ||||
| workspace_mem_(nullptr), | |||||
| rt_model_handle_(nullptr), | rt_model_handle_(nullptr), | ||||
| priority_(0), | priority_(0), | ||||
| slave_stream_list_(), | |||||
| hcom_bind_model_(nullptr), | |||||
| hcom_unbind_model_(nullptr), | |||||
| hcom_distribute_task_(nullptr) { | |||||
| secondary_stream_list_() { | |||||
| if (task_info_ == nullptr) { | if (task_info_ == nullptr) { | ||||
| GELOGW("task_info_ is null!"); | GELOGW("task_info_ is null!"); | ||||
| } | } | ||||
| hcom_bind_model_ = task_info->hcom_bind_model(); | |||||
| hcom_unbind_model_ = task_info->hcom_unbind_model(); | |||||
| priority_ = model_context.priority(); | priority_ = model_context.priority(); | ||||
| rt_model_handle_ = model_context.rt_model_handle(); | rt_model_handle_ = model_context.rt_model_handle(); | ||||
| auto stream_list = model_context.stream_list(); | auto stream_list = model_context.stream_list(); | ||||
| if (hcom_bind_model_ != nullptr) { | |||||
| if (rt_model_handle_list_.insert(rt_model_handle_).second) { | |||||
| for (auto stream : stream_list) { | |||||
| (void)hcom_bind_model_(rt_model_handle_, stream); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (stream_list.size() == 1) { | if (stream_list.size() == 1) { | ||||
| stream_ = stream_list[0]; | stream_ = stream_list[0]; | ||||
| } else if (stream_list.size() > task_info->stream_id()) { | } else if (stream_list.size() > task_info->stream_id()) { | ||||
| stream_ = stream_list[task_info->stream_id()]; | stream_ = stream_list[task_info->stream_id()]; | ||||
| } else { | } else { | ||||
| GELOGW("index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size()); | |||||
| GELOGW("Index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size()); | |||||
| } | } | ||||
| } | } | ||||
| HcclTask::~HcclTask() { | HcclTask::~HcclTask() { | ||||
| for (size_t i = 0; i < slave_stream_list_.size(); ++i) { | |||||
| rtError_t rt_ret = rtModelUnbindStream(rt_model_handle_, slave_stream_list_[i]); | |||||
| if (workspace_mem_ != nullptr) { | |||||
| rtError_t rt_ret = rtFree(workspace_mem_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Unbind stream from model failed! Index: %zu", i); | |||||
| } | |||||
| } | |||||
| for (size_t i = 0; i < slave_stream_list_.size(); ++i) { | |||||
| rtError_t rt_ret = rtStreamDestroy(slave_stream_list_[i]); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Destroy stream failed! Index: %zu", i); | |||||
| } | |||||
| } | |||||
| if (hcom_unbind_model_ != nullptr) { | |||||
| if (rt_model_handle_list_.find(rt_model_handle_) != rt_model_handle_list_.end()) { | |||||
| (void)hcom_unbind_model_(rt_model_handle_); | |||||
| (void)rt_model_handle_list_.erase(rt_model_handle_); | |||||
| GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret); | |||||
| } | } | ||||
| workspace_mem_ = nullptr; | |||||
| } | } | ||||
| } | } | ||||
| bool HcclTask::Distribute() { | bool HcclTask::Distribute() { | ||||
| // No ops kernel info store | |||||
| hcom_distribute_task_ = task_info_->hcom_distribute_task(); | |||||
| if (hcom_distribute_task_ != nullptr) { | |||||
| return hcom_distribute_task_(task_info_, stream_); | |||||
| } | |||||
| // Ops kernel info store | // Ops kernel info store | ||||
| // Get privateDef and opsKernelStorePtr | // Get privateDef and opsKernelStorePtr | ||||
| GELOGI("get custom info in modelTaskDef"); | |||||
| GELOGI("Get custom info in modelTaskDef"); | |||||
| void *ops_kernel_store = task_info_->ops_kernel_store(); | void *ops_kernel_store = task_info_->ops_kernel_store(); | ||||
| OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store); | OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store); | ||||
| if (ops_kernel_store == nullptr) { | if (ops_kernel_store == nullptr) { | ||||
| @@ -101,25 +74,15 @@ bool HcclTask::Distribute() { | |||||
| char *private_def = reinterpret_cast<char *>(const_cast<char unsigned *>(task_info_->private_def().data())); | char *private_def = reinterpret_cast<char *>(const_cast<char unsigned *>(task_info_->private_def().data())); | ||||
| auto private_def_len = static_cast<uint32_t>(task_info_->private_def().size()); | auto private_def_len = static_cast<uint32_t>(task_info_->private_def().size()); | ||||
| GELOGI("the first address of the custom info, privateDef=%p", private_def); | |||||
| GELOGI("hcclStreamNum =%ld", task_info_->hccl_stream_num()); | |||||
| for (int64_t i = 0; i < task_info_->hccl_stream_num(); ++i) { | |||||
| rtStream_t stream = nullptr; | |||||
| rtError_t rt_ret = rtStreamCreateWithFlags(&stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
| return false; | |||||
| } | |||||
| GELOGI("The first address of the custom info, privateDef=%p", private_def); | |||||
| SetSecondaryStream(); | |||||
| rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM); | |||||
| if (task_info_->workspace_size() > 0) { | |||||
| rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| return false; | return false; | ||||
| } | } | ||||
| GELOGI("hccl_stream addr is=%p", stream); | |||||
| slave_stream_list_.push_back(stream); | |||||
| } | } | ||||
| GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); | GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); | ||||
| @@ -128,17 +91,22 @@ bool HcclTask::Distribute() { | |||||
| ge_task.type = static_cast<uint16_t>(RT_MODEL_TASK_HCCL); | ge_task.type = static_cast<uint16_t>(RT_MODEL_TASK_HCCL); | ||||
| ge_task.stream = stream_; | ge_task.stream = stream_; | ||||
| ge_task.kernelHcclInfo = std::vector<GETaskKernelHcclInfo>(1); | |||||
| ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type(); | ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type(); | ||||
| ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr(); | ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr(); | ||||
| ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr(); | ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr(); | ||||
| ge_task.kernelHcclInfo[0].workSpaceAddr = task_info_->workspace_addr(); | |||||
| ge_task.kernelHcclInfo[0].workSpaceAddr = workspace_mem_; | |||||
| ge_task.kernelHcclInfo[0].workSpaceMemSize = task_info_->workspace_size(); | ge_task.kernelHcclInfo[0].workSpaceMemSize = task_info_->workspace_size(); | ||||
| ge_task.kernelHcclInfo[0].count = task_info_->count(); | ge_task.kernelHcclInfo[0].count = task_info_->count(); | ||||
| ge_task.kernelHcclInfo[0].dataType = static_cast<int32_t>(task_info_->data_type()); | ge_task.kernelHcclInfo[0].dataType = static_cast<int32_t>(task_info_->data_type()); | ||||
| ge_task.kernelHcclInfo[0].opType = static_cast<int32_t>(task_info_->op_type()); | ge_task.kernelHcclInfo[0].opType = static_cast<int32_t>(task_info_->op_type()); | ||||
| ge_task.kernelHcclInfo[0].rootId = task_info_->root_id(); | ge_task.kernelHcclInfo[0].rootId = task_info_->root_id(); | ||||
| ge_task.kernelHcclInfo[0].hcclStreamList = slave_stream_list_; | |||||
| std::vector<rtStream_t> secondary_stream_list; | |||||
| std::transform(secondary_stream_list_.begin(), secondary_stream_list_.end(), | |||||
| std::back_inserter(secondary_stream_list), | |||||
| [](const std::shared_ptr<StreamGuard> &stream) -> rtStream_t { return stream->GetStream(); }); | |||||
| ge_task.kernelHcclInfo[0].hcclStreamList = secondary_stream_list; | |||||
| ge_task.privateDef = private_def; | ge_task.privateDef = private_def; | ||||
| ge_task.privateDefLen = private_def_len; | ge_task.privateDefLen = private_def_len; | ||||
| @@ -151,10 +119,152 @@ bool HcclTask::Distribute() { | |||||
| return false; | return false; | ||||
| } | } | ||||
| GELOGI("call function LoadTask end."); | |||||
| GELOGI("Call function LoadTask end."); | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool HcclTask::SetSecondaryStream() { | |||||
| const uint32_t master_stream_id = task_info_->stream_id(); | |||||
| const int64_t hccl_secondary_stream_num = task_info_->hccl_stream_num(); | |||||
| Status ret; | |||||
| std::lock_guard<std::mutex> lock(model_stream_mapping_mutex_); | |||||
| if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) { | |||||
| GELOGI("Need to create map for rt_model_handle_:%p with new mainstream %ld.", rt_model_handle_, master_stream_id); | |||||
| ret = CreateStream(hccl_secondary_stream_num, master_stream_id); | |||||
| if (!ret) { | |||||
| GELOGE(RT_FAILED, "Create hccl stream failed."); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map = | |||||
| model_stream_mapping_.at(rt_model_handle_); | |||||
| auto iter = master_secondary_stream_map.find(master_stream_id); | |||||
| if (iter != master_secondary_stream_map.end()) { | |||||
| std::vector<std::weak_ptr<StreamGuard>> &secondary_stream_vec = iter->second; | |||||
| auto lock_weak_ptr = [&secondary_stream_vec, this](int64_t index) -> bool { | |||||
| auto stream = secondary_stream_vec[index].lock(); | |||||
| if (stream == nullptr) { | |||||
| rtStream_t new_stream = nullptr; | |||||
| bool ret = CreateStream(rt_model_handle_, &new_stream); | |||||
| if (!ret) { | |||||
| GELOGE(FAILED, "CreateStream failed."); | |||||
| return false; | |||||
| } | |||||
| stream = std::make_shared<HcclTask::StreamGuard>(rt_model_handle_, new_stream); | |||||
| if (stream == nullptr) { | |||||
| GELOGE(FAILED, "MakeShared failed."); | |||||
| return false; | |||||
| } | |||||
| secondary_stream_vec[index] = stream; | |||||
| } | |||||
| secondary_stream_list_.push_back(stream); | |||||
| return true; | |||||
| }; | |||||
| if (static_cast<size_t>(hccl_secondary_stream_num) <= secondary_stream_vec.size()) { | |||||
| GELOGI("Number of secondary stream is enough to be reused."); | |||||
| for (int64_t i = 0; i < hccl_secondary_stream_num; ++i) { | |||||
| if (!lock_weak_ptr(i)) { | |||||
| GELOGE(FAILED, "Lock weak ptr failed."); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| GELOGI("Need to reuse secondary stream and create new secondary stream."); | |||||
| size_t created_stream_num = secondary_stream_vec.size(); | |||||
| for (size_t i = 0; i < secondary_stream_vec.size(); ++i) { | |||||
| if (!lock_weak_ptr(i)) { | |||||
| GELOGE(FAILED, "Lock weak ptr failed."); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| ret = CreateStream(hccl_secondary_stream_num - created_stream_num, master_stream_id); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(RT_FAILED, "Create hccl stream failed."); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| GELOGI("Initialize hccl secondary stream success, hccl_secondary_stream_num =%ld", hccl_secondary_stream_num); | |||||
| } else { | |||||
| GELOGI("Need to create secondary stream for %s with new mainstream %ld.", task_info_->op_name().c_str(), | |||||
| master_stream_id); | |||||
| ret = CreateStream(hccl_secondary_stream_num, master_stream_id); | |||||
| if (!ret) { | |||||
| GELOGE(RT_FAILED, "Create hccl stream failed."); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool HcclTask::CreateStream(int64_t stream_num, int64_t master_stream_id) { | |||||
| GELOGI("Start to create %ld hccl secondary stream.", stream_num); | |||||
| for (int64_t i = 0; i < stream_num; ++i) { | |||||
| rtStream_t stream = nullptr; | |||||
| bool ret = CreateStream(rt_model_handle_, &stream); | |||||
| if (!ret) { | |||||
| GELOGE(FAILED, "CreateStream failed."); | |||||
| return false; | |||||
| } | |||||
| GELOGD("hccl_stream addr is=%p", stream); | |||||
| auto shared_stream = std::make_shared<StreamGuard>(rt_model_handle_, stream); | |||||
| if (shared_stream == nullptr) { | |||||
| GELOGE(FAILED, "MakeShared failed."); | |||||
| return false; | |||||
| } | |||||
| SaveHcclSecondaryStream(master_stream_id, shared_stream); | |||||
| secondary_stream_list_.push_back(shared_stream); | |||||
| } | |||||
| GELOGI("CreateStream success."); | |||||
| return true; | |||||
| } | |||||
| bool HcclTask::CreateStream(rtModel_t model, rtStream_t *stream) const { | |||||
| if (stream == nullptr) { | |||||
| GELOGE(FAILED, "Output param stream is null."); | |||||
| return false; | |||||
| } | |||||
| rtError_t rt_ret = rtStreamCreateWithFlags(stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
| return false; | |||||
| } | |||||
| // Create secondary stream, inactive by default, activated by hccl | |||||
| rt_ret = rtModelBindStream(model, *stream, RT_MODEL_WAIT_ACTIVE_STREAM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void HcclTask::SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream) { | |||||
| if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) { | |||||
| model_stream_mapping_.emplace(rt_model_handle_, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>()); | |||||
| } | |||||
| std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map = | |||||
| model_stream_mapping_.at(rt_model_handle_); | |||||
| master_secondary_stream_map[master_stream_id].emplace_back(stream); | |||||
| } | |||||
| HcclTask::StreamGuard::~StreamGuard() { | |||||
| rtError_t rt_ret = rtModelUnbindStream(model_, stream_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Unbind stream from model failed!"); | |||||
| return; | |||||
| } | |||||
| rt_ret = rtStreamDestroy(stream_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Destroy stream failed!"); | |||||
| return; | |||||
| } | |||||
| } | |||||
| REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo); | REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo); | ||||
| } // namespace model_runner | } // namespace model_runner | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -19,7 +19,9 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <set> | #include <set> | ||||
| #include <map> | |||||
| #include <vector> | #include <vector> | ||||
| #include <mutex> | |||||
| #include "ge_runtime/task/task.h" | #include "ge_runtime/task/task.h" | ||||
| namespace ge { | namespace ge { | ||||
| @@ -33,18 +35,34 @@ class HcclTask : public TaskRepeater<HcclTaskInfo> { | |||||
| bool Distribute() override; | bool Distribute() override; | ||||
| private: | private: | ||||
| class StreamGuard; | |||||
| bool SetSecondaryStream(); | |||||
| bool CreateStream(int64_t stream_num, int64_t master_stream_id); | |||||
| bool CreateStream(rtModel_t model, rtStream_t *stream) const; | |||||
| void SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream); | |||||
| std::shared_ptr<HcclTaskInfo> task_info_; | std::shared_ptr<HcclTaskInfo> task_info_; | ||||
| void *stream_; | void *stream_; | ||||
| void *workspace_mem_; | |||||
| rtModel_t rt_model_handle_; | rtModel_t rt_model_handle_; | ||||
| int32_t priority_; | int32_t priority_; | ||||
| std::vector<void *> slave_stream_list_; | |||||
| std::function<bool(void *, void *)> hcom_bind_model_; | |||||
| std::function<bool(void *)> hcom_unbind_model_; | |||||
| std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_; | |||||
| static std::set<rtModel_t> rt_model_handle_list_; | |||||
| std::vector<std::shared_ptr<StreamGuard>> secondary_stream_list_; | |||||
| // map<key: model pointer, value: map<key: primary stream id, value: vector<secondary stream pointer>>> | |||||
| static std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>> model_stream_mapping_; | |||||
| static std::mutex model_stream_mapping_mutex_; | |||||
| }; | }; | ||||
| std::set<rtModel_t> HcclTask::rt_model_handle_list_{}; | |||||
| class HcclTask::StreamGuard { | |||||
| public: | |||||
| StreamGuard(rtModel_t model, rtStream_t stream) : model_(model), stream_(stream) {} | |||||
| ~StreamGuard(); | |||||
| rtStream_t GetStream() const { return stream_; } | |||||
| private: | |||||
| rtModel_t model_; | |||||
| rtStream_t stream_; | |||||
| }; | |||||
| } // namespace model_runner | } // namespace model_runner | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -200,7 +200,6 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||||
| } | } | ||||
| static void GetOpsProtoPath(string &opsproto_path) { | static void GetOpsProtoPath(string &opsproto_path) { | ||||
| GELOGI("Start to get ops proto path schedule."); | |||||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | const char *path_env = std::getenv("ASCEND_OPP_PATH"); | ||||
| if (path_env != nullptr) { | if (path_env != nullptr) { | ||||
| string path = path_env; | string path = path_env; | ||||
| @@ -383,7 +382,6 @@ bool GeGenerator::Impl::ParseVersion(const std::string &line, std::string &versi | |||||
| } | } | ||||
| version = temp.substr(pos + flag.size()); | version = temp.substr(pos + flag.size()); | ||||
| GELOGI("Version=%s", version.c_str()); | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -425,7 +423,6 @@ bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) { | |||||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | path_base = path_base.substr(0, path_base.rfind('/') + 1); | ||||
| std::string version_path = path_base + "version.info"; | std::string version_path = path_base + "version.info"; | ||||
| GELOGI("version_path is %s", version_path.c_str()); | |||||
| std::string version; | std::string version; | ||||
| if (!GetVersionFromPath(version_path, version)) { | if (!GetVersionFromPath(version_path, version)) { | ||||
| GELOGW("Get atc version information failed!"); | GELOGW("Get atc version information failed!"); | ||||
| @@ -436,7 +433,6 @@ bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) { | |||||
| GELOGW("Ge model set atc version failed!"); | GELOGW("Ge model set atc version failed!"); | ||||
| return false; | return false; | ||||
| } | } | ||||
| GELOGI("Ge model set atc version information success."); | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -449,7 +445,6 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { | |||||
| } | } | ||||
| std::string version_path = path_env; | std::string version_path = path_env; | ||||
| version_path += "/version.info"; | version_path += "/version.info"; | ||||
| GELOGI("version_path is %s", version_path.c_str()); | |||||
| std::string version; | std::string version; | ||||
| if (!GetVersionFromPath(version_path, version)) { | if (!GetVersionFromPath(version_path, version)) { | ||||
| GELOGW("Get opp version information failed!"); | GELOGW("Get opp version information failed!"); | ||||
| @@ -460,7 +455,6 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { | |||||
| GELOGW("Ge model set opp version failed!"); | GELOGW("Ge model set opp version failed!"); | ||||
| return false; | return false; | ||||
| } | } | ||||
| GELOGI("Ge Model set opp version information success."); | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -469,7 +463,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
| rtContext_t ctx = nullptr; | rtContext_t ctx = nullptr; | ||||
| auto rt = rtCtxGetCurrent(&ctx); | auto rt = rtCtxGetCurrent(&ctx); | ||||
| if (rt != RT_ERROR_NONE) { | if (rt != RT_ERROR_NONE) { | ||||
| GELOGW("Current ctx is null."); | |||||
| GELOGD("Current ctx is null."); | |||||
| ctx = nullptr; | ctx = nullptr; | ||||
| } | } | ||||
| @@ -524,7 +518,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
| (void)rtCtxSetCurrent(ctx); | (void)rtCtxSetCurrent(ctx); | ||||
| } | } | ||||
| GELOGI("GenerateOfflineModel success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -713,7 +706,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
| return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | ||||
| } | } | ||||
| GELOGI("Model inputs size is %zu", inputs.size()); | |||||
| graph_manager_.SetOptionsRunGraphFlag(false); | graph_manager_.SetOptionsRunGraphFlag(false); | ||||
| static std::atomic<uint64_t> atomic_session_id(0); | static std::atomic<uint64_t> atomic_session_id(0); | ||||
| @@ -102,7 +102,6 @@ void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | |||||
| } | } | ||||
| Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | ||||
| GELOGI("Begin to calculate op running param."); | |||||
| GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
| auto instance_ptr = ge::GELib::GetInstance(); | auto instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| @@ -140,7 +139,6 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | |||||
| auto parent_node = graph->GetParentNode(); | auto parent_node = graph->GetParentNode(); | ||||
| if (parent_node == nullptr) { | if (parent_node == nullptr) { | ||||
| GELOGI("Graph[%s] do not have parent node, no need update parent node output size.", graph->GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -189,7 +187,6 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||||
| Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | ||||
| GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | ||||
| GELOGI("Start to build model."); | |||||
| if (comp_graph == nullptr) { | if (comp_graph == nullptr) { | ||||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | ||||
| return GE_GRAPH_PARAM_NULLPTR; | return GE_GRAPH_PARAM_NULLPTR; | ||||
| @@ -267,7 +264,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr), | GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr), | ||||
| "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str()); | "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str()); | ||||
| GELOGI("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||||
| GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||||
| GE_TIMESTAMP_END(BuildSubgraph, "GraphBuilder::Build"); | GE_TIMESTAMP_END(BuildSubgraph, "GraphBuilder::Build"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -306,7 +303,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr), | GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr), | ||||
| "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str()); | "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str()); | ||||
| GELOGI("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||||
| GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -542,7 +539,6 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||||
| } | } | ||||
| Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) { | Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) { | ||||
| GELOGI("[SecondPartition] second partition."); | |||||
| GE_TIMESTAMP_START(GraphPartition2); | GE_TIMESTAMP_START(GraphPartition2); | ||||
| auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -33,7 +33,7 @@ Status LabelAllocator::AssignFunctionalLabels() { | |||||
| } | } | ||||
| // Add label task for sub graph. | // Add label task for sub graph. | ||||
| GELOGI("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str()); | |||||
| GELOGD("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str()); | |||||
| std::set<NodePtr> functional_nodes; | std::set<NodePtr> functional_nodes; | ||||
| for (auto graph : compute_graph_->GetAllSubgraphs()) { | for (auto graph : compute_graph_->GetAllSubgraphs()) { | ||||
| if (!CollectFunctionalNode(graph, functional_nodes)) { | if (!CollectFunctionalNode(graph, functional_nodes)) { | ||||
| @@ -597,10 +597,10 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||||
| return status; | return status; | ||||
| } | } | ||||
| GELOGI("Subgraphs of graph %s:", graph->GetName().c_str()); | |||||
| GELOGD("Subgraphs of graph %s:", graph->GetName().c_str()); | |||||
| for (const auto &subgraph : subgraphs) { | for (const auto &subgraph : subgraphs) { | ||||
| if (subgraph != nullptr) { | if (subgraph != nullptr) { | ||||
| GELOGI("subgraph: %s", subgraph->name.c_str()); | |||||
| GELOGD("subgraph: %s", subgraph->name.c_str()); | |||||
| } | } | ||||
| } | } | ||||
| @@ -664,9 +664,9 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec | |||||
| Status status = pass->Run(graph, subgraphs, context_); | Status status = pass->Run(graph, subgraphs, context_); | ||||
| if (status == SUCCESS) { | if (status == SUCCESS) { | ||||
| GELOGI("Stream pass %s return SUCCESS.", pass->GetName().c_str()); | |||||
| GELOGD("Stream pass %s return SUCCESS.", pass->GetName().c_str()); | |||||
| } else if (status == NOT_CHANGED) { | } else if (status == NOT_CHANGED) { | ||||
| GELOGI("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); | |||||
| GELOGD("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); | |||||
| } else { | } else { | ||||
| GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str()); | GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str()); | ||||
| return status; | return status; | ||||
| @@ -76,7 +76,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
| auto range_number = static_cast<size_t>( | auto range_number = static_cast<size_t>( | ||||
| ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase))); | ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase))); | ||||
| range_number = (range_number == 0) ? 1 : range_number; | range_number = (range_number == 0) ? 1 : range_number; | ||||
| GELOGI("Range number: %zu", range_number); | |||||
| GELOGD("Range number: %zu", range_number); | |||||
| vector<vector<int64_t>> ranges(range_number); | vector<vector<int64_t>> ranges(range_number); | ||||
| GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); | GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); | ||||
| @@ -114,7 +114,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
| range_ceils.push_back(range.back()); | range_ceils.push_back(range.back()); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Range ceils: %s", ToString(range_ceils).c_str()); | |||||
| GELOGD("Range ceils: %s", ToString(range_ceils).c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -455,12 +455,11 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
| GetNodeWorkSpaceSize(n, temp); | GetNodeWorkSpaceSize(n, temp); | ||||
| all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | ||||
| } | } | ||||
| GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); | |||||
| for (const auto &pair : symbol_size_) { | for (const auto &pair : symbol_size_) { | ||||
| all_memory_size.emplace_back(pair.second); | all_memory_size.emplace_back(pair.second); | ||||
| } | } | ||||
| sort(all_memory_size.begin(), all_memory_size.end()); | sort(all_memory_size.begin(), all_memory_size.end()); | ||||
| GELOGI("All memory size: %s", ToString(all_memory_size).c_str()); | |||||
| GELOGD("All memory size: %s", ToString(all_memory_size).c_str()); | |||||
| for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | ||||
| if (*iter == 0) { | if (*iter == 0) { | ||||
| @@ -495,7 +494,7 @@ size_t GetBlockSize(size_t size, const vector<int64_t> &ranges) { | |||||
| bool IsDirectOutputNode(const NodePtr &node, int idx) { | bool IsDirectOutputNode(const NodePtr &node, int idx) { | ||||
| if ((node != nullptr) && (node->GetOpDesc() != nullptr) && (node->GetOpDesc()->GetType() == NETOUTPUT)) { | if ((node != nullptr) && (node->GetOpDesc() != nullptr) && (node->GetOpDesc()->GetType() == NETOUTPUT)) { | ||||
| GELOGI("This is netoutput node, the input node mem can not be reused"); | |||||
| GELOGD("This is netoutput node, the input node mem can not be reused"); | |||||
| return true; | return true; | ||||
| } | } | ||||
| return false; | return false; | ||||
| @@ -1102,7 +1101,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i | |||||
| if (static_cast<uint32_t>(index) == output_index) { | if (static_cast<uint32_t>(index) == output_index) { | ||||
| if (node->GetOwnerComputeGraph() != nullptr) { | if (node->GetOwnerComputeGraph() != nullptr) { | ||||
| string graph_name = node->GetOwnerComputeGraph()->GetName(); | string graph_name = node->GetOwnerComputeGraph()->GetName(); | ||||
| GELOGD("[IMAS]Atomic no assign %s name[%s] output[%d] streamid[%ld].", graph_name.c_str(), | |||||
| GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), | |||||
| op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | ||||
| } | } | ||||
| return true; | return true; | ||||
| @@ -1219,7 +1218,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
| vector<int64_t> memorys_type; | vector<int64_t> memorys_type; | ||||
| bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); | bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); | ||||
| GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(), | |||||
| GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | |||||
| op_desc->GetOutputsSize(), memorys_type.size()); | op_desc->GetOutputsSize(), memorys_type.size()); | ||||
| if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | ||||
| GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | ||||
| @@ -1257,7 +1256,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| // fusion: other type's size not means malloc HBM memory | // fusion: other type's size not means malloc HBM memory | ||||
| bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | ||||
| if (l1_flag) { | if (l1_flag) { | ||||
| GELOGI("fusion: node[%s], output[%s], output memory type [%d]", | |||||
| GELOGI("fusion: node[%s], output[%s], output memory type [%ld]", | |||||
| op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | ||||
| size = 0; | size = 0; | ||||
| } | } | ||||
| @@ -1311,7 +1310,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| /// | /// | ||||
| void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | ||||
| (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_); | (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_); | ||||
| GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | |||||
| GELOGD("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | |||||
| string op_no_reuse_mem_str; | string op_no_reuse_mem_str; | ||||
| const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | ||||
| GE_IF_BOOL_EXEC(op_no_reuse_mem != nullptr, op_no_reuse_mem_str = string(op_no_reuse_mem); | GE_IF_BOOL_EXEC(op_no_reuse_mem != nullptr, op_no_reuse_mem_str = string(op_no_reuse_mem); | ||||
| @@ -1337,7 +1336,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| vector<bool> workspace_reuse_flag; | vector<bool> workspace_reuse_flag; | ||||
| GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag), | GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag), | ||||
| GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); | GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); | ||||
| GELOGI("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(), | |||||
| GELOGD("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(), | |||||
| temp.size(), tvm_workspace_memory_type.size()); | temp.size(), tvm_workspace_memory_type.size()); | ||||
| if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | ||||
| @@ -1350,7 +1349,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| bool workspace_skip_flag = false; | bool workspace_skip_flag = false; | ||||
| if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { | if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { | ||||
| GELOGI( | GELOGI( | ||||
| "fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", | |||||
| "fusion: node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", | |||||
| node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); | node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); | ||||
| workspace_skip_flag = true; | workspace_skip_flag = true; | ||||
| } | } | ||||
| @@ -1628,7 +1627,7 @@ void BlockMemAssigner::ResizeMemoryBlocks() { | |||||
| memory_block->SetTailOffset(p2p_mem_offset_ - 1); | memory_block->SetTailOffset(p2p_mem_offset_ - 1); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", | |||||
| GELOGD("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", | |||||
| mem_offset_, p2p_mem_offset_); | mem_offset_, p2p_mem_offset_); | ||||
| } | } | ||||
| @@ -117,7 +117,7 @@ Status GraphMemoryAssigner::AssignMemory() { | |||||
| return ge::FAILED; | return ge::FAILED; | ||||
| } | } | ||||
| int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign; | int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign; | ||||
| GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign); | |||||
| GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign); | |||||
| mem_assigner_ = std::move(mem_assigner); | mem_assigner_ = std::move(mem_assigner); | ||||
| @@ -296,7 +296,6 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||||
| mem_offset[RT_MEMORY_HBM] += memory_block->Size(); | mem_offset[RT_MEMORY_HBM] += memory_block->Size(); | ||||
| memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1); | memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1); | ||||
| } | } | ||||
| GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset[RT_MEMORY_HBM]); | |||||
| // set offset for zero copy nodes | // set offset for zero copy nodes | ||||
| priority_assigner->SetOpMemOffset(true); | priority_assigner->SetOpMemOffset(true); | ||||
| @@ -309,14 +308,13 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||||
| } | } | ||||
| iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | ||||
| GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp, | |||||
| GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp, | |||||
| zero_mem_copy_size); | zero_mem_copy_size); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | ||||
| GELOGI("Begin to reassign continuous memory"); | |||||
| Status ret; | Status ret; | ||||
| for (auto &node : compute_graph_->GetAllNodes()) { | for (auto &node : compute_graph_->GetAllNodes()) { | ||||
| // Get the continuous input type of the node, default is false | // Get the continuous input type of the node, default is false | ||||
| @@ -387,7 +385,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
| } | } | ||||
| } | } | ||||
| for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
| GELOGI("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, | |||||
| GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, | |||||
| pair.second.mem_offset_); | pair.second.mem_offset_); | ||||
| } | } | ||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| @@ -456,7 +454,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
| output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; | output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; | ||||
| } | } | ||||
| GELOGI( | GELOGI( | ||||
| "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " | |||||
| "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " | |||||
| "real_size[%u].", | "real_size[%u].", | ||||
| node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), | node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), | ||||
| peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), | peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), | ||||
| @@ -834,7 +832,6 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||||
| string max_batch_label; | string max_batch_label; | ||||
| GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), | GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), | ||||
| "Get max batch label failed."); | "Get max batch label failed."); | ||||
| GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str()); | |||||
| PrintMemoryOffset(); | PrintMemoryOffset(); | ||||
| vector<size_t> nodes_mem_offset_list; | vector<size_t> nodes_mem_offset_list; | ||||
| for (auto &i_map : mem_reuse_nodes_map) { | for (auto &i_map : mem_reuse_nodes_map) { | ||||
| @@ -1507,7 +1504,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
| GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | ||||
| } | } | ||||
| GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||||
| GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||||
| has_mem_type_attr == true ? "Fusion" : "", | has_mem_type_attr == true ? "Fusion" : "", | ||||
| tmp_op_desc->GetName().c_str(), | tmp_op_desc->GetName().c_str(), | ||||
| valid_input_index, | valid_input_index, | ||||
| @@ -62,9 +62,9 @@ Status HybridMemAssigner::Assign() { | |||||
| std::unique_ptr<BlockMemAssigner> priority_assigner; | std::unique_ptr<BlockMemAssigner> priority_assigner; | ||||
| GELOGI("Binary-block memory size:%zu, max-block memory size:%zu", bin_mem_size, max_mem_size); | |||||
| GELOGD("Binary-block memory size:%zu, max-block memory size:%zu", bin_mem_size, max_mem_size); | |||||
| if (bin_mem_size <= max_mem_size) { | if (bin_mem_size <= max_mem_size) { | ||||
| GELOGI("Use binary-block memory assigner method"); | |||||
| GELOGD("Use binary-block memory assigner method"); | |||||
| priority_assigner = std::move(binary_assigner); | priority_assigner = std::move(binary_assigner); | ||||
| } else { | } else { | ||||
| GELOGI("Use max-block memory assigner method"); | GELOGI("Use max-block memory assigner method"); | ||||
| @@ -189,7 +189,6 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) { | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | ||||
| const auto &src_node = peer_out_anchor->GetOwnerNode(); | const auto &src_node = peer_out_anchor->GetOwnerNode(); | ||||
| if (!NodeUtils::GetConstOpType(src_node, const_type)) { | if (!NodeUtils::GetConstOpType(src_node, const_type)) { | ||||
| GELOGI("Node %s:%zu, sorce node: %s Not Const", n->GetName().c_str(), index, src_node->GetName().c_str()); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -232,7 +231,6 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_ | |||||
| Status ModelBuilder::SetInputOutputDesc() { | Status ModelBuilder::SetInputOutputDesc() { | ||||
| Status ret; | Status ret; | ||||
| GELOGI("Start to SetInputOutputDesc."); | |||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | ||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| @@ -245,7 +243,6 @@ Status ModelBuilder::SetInputOutputDesc() { | |||||
| // final graph. | // final graph. | ||||
| if ((GetLocalOmgContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) && | if ((GetLocalOmgContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) && | ||||
| ((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) { | ((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) { | ||||
| GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str()); | |||||
| auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr(); | auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr(); | ||||
| auto outputDescsPtr = node_op_desc->GetAllOutputsDescPtr(); | auto outputDescsPtr = node_op_desc->GetAllOutputsDescPtr(); | ||||
| ge::Format format = ge::FORMAT_ND; | ge::Format format = ge::FORMAT_ND; | ||||
| @@ -290,7 +287,7 @@ void ModelBuilder::AddNodeInputProperty() { | |||||
| vector<int64_t> src_index_list; | vector<int64_t> src_index_list; | ||||
| for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
| auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | ||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr!"); continue); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
| GE_IF_BOOL_EXEC(node_op_desc->HasAttr(MERGE_PRENODE_FLAG), continue); | GE_IF_BOOL_EXEC(node_op_desc->HasAttr(MERGE_PRENODE_FLAG), continue); | ||||
| ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); | ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); | ||||
| @@ -347,7 +344,6 @@ void ModelBuilder::AddNodeInputProperty() { | |||||
| } | } | ||||
| Status ModelBuilder::AdjustInputTensorFlag() { | Status ModelBuilder::AdjustInputTensorFlag() { | ||||
| GELOGI("Start to AdjustInputTensorFlag."); | |||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | ||||
| if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | ||||
| GELOGD("Data node: %s.", n->GetName().c_str()); | GELOGD("Data node: %s.", n->GetName().c_str()); | ||||
| @@ -441,7 +437,6 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
| return FAILED); | return FAILED); | ||||
| const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(session_id_); | const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(session_id_); | ||||
| bool is_op_debug = dump_properties.IsOpDebugOpen(); | bool is_op_debug = dump_properties.IsOpDebugOpen(); | ||||
| GELOGI("Get op debug:%d", is_op_debug); | |||||
| if (is_op_debug) { | if (is_op_debug) { | ||||
| if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) { | if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) { | ||||
| GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed."); | GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed."); | ||||
| @@ -608,7 +603,6 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
| } | } | ||||
| tbe_name_set.insert(tbe_kernel->GetName()); | tbe_name_set.insert(tbe_kernel->GetName()); | ||||
| tbe_kernel_store_.AddTBEKernel(tbe_kernel); | tbe_kernel_store_.AddTBEKernel(tbe_kernel); | ||||
| GELOGI("Add tbe kernel bin %s", tbe_kernel->GetName().c_str()); | |||||
| } | } | ||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | ||||
| @@ -678,7 +672,6 @@ Status ModelBuilder::PreBuildModel() { | |||||
| GELOGE(FAILED, "Graph_ is not valid."); | GELOGE(FAILED, "Graph_ is not valid."); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI("BuildModel begin."); | |||||
| GE_CHK_STATUS_RET(SetInputOutputDesc(), "SetInputOutputDesc Failed!"); | GE_CHK_STATUS_RET(SetInputOutputDesc(), "SetInputOutputDesc Failed!"); | ||||
| @@ -140,7 +140,7 @@ void RunContextUtil::DestroyRtModelResources() noexcept { | |||||
| Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &graph, Buffer &buffer, | Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &graph, Buffer &buffer, | ||||
| const uint64_t session_id) { | const uint64_t session_id) { | ||||
| GELOGI("Begin to Create RunContext, session_id = %lu", session_id); | |||||
| GELOGD("Begin to Create RunContext, session_id = %lu", session_id); | |||||
| // check params | // check params | ||||
| if (graph == nullptr) { | if (graph == nullptr) { | ||||
| GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id); | GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id); | ||||
| @@ -152,21 +152,21 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra | |||||
| GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id); | GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| GELOGI("Stream_num = %u", stream_num); | |||||
| GELOGD("Stream_num = %u", stream_num); | |||||
| uint32_t event_num = 0; | uint32_t event_num = 0; | ||||
| if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) { | if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) { | ||||
| GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id); | GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| GELOGI("Event_num = %u", event_num); | |||||
| GELOGD("Event_num = %u", event_num); | |||||
| uint32_t label_num = 0; | uint32_t label_num = 0; | ||||
| if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) { | if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) { | ||||
| GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id); | GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| GELOGI("Label_num = %u", label_num); | |||||
| GELOGD("Label_num = %u", label_num); | |||||
| Status ret = CreateRtModelResources(stream_num, event_num, label_num); | Status ret = CreateRtModelResources(stream_num, event_num, label_num); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -198,11 +198,11 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra | |||||
| void RunContextUtil::PrintMemInfo() { | void RunContextUtil::PrintMemInfo() { | ||||
| for (auto iter : mem_type_to_data_mem_base_) { | for (auto iter : mem_type_to_data_mem_base_) { | ||||
| GELOGI("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second); | |||||
| GELOGD("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second); | |||||
| } | } | ||||
| for (auto iter : mem_type_to_data_mem_size_) { | for (auto iter : mem_type_to_data_mem_size_) { | ||||
| GELOGI("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second); | |||||
| GELOGD("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second); | |||||
| } | } | ||||
| } | } | ||||
| @@ -67,11 +67,10 @@ StreamAllocator::StreamAllocator(ComputeGraphPtr whole_graph, const Graph2SubGra | |||||
| } | } | ||||
| enable_single_stream_ = (single_stream_str == kTrueStr) ? true : false; | enable_single_stream_ = (single_stream_str == kTrueStr) ? true : false; | ||||
| GELOGI("Enable single stream: %s.", enable_single_stream_ ? kTrueStr : kFalseStr); | |||||
| GELOGD("Enable single stream: %s.", enable_single_stream_ ? kTrueStr : kFalseStr); | |||||
| } | } | ||||
| Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &max_parallel_num, bool hcom_parallel) { | Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &max_parallel_num, bool hcom_parallel) { | ||||
| GELOGI("Assign logical streams start."); | |||||
| GE_CHECK_NOTNULL(whole_graph_); | GE_CHECK_NOTNULL(whole_graph_); | ||||
| GE_DUMP(whole_graph_, "BeforeAssignedLogicalStreams"); | GE_DUMP(whole_graph_, "BeforeAssignedLogicalStreams"); | ||||
| @@ -92,15 +91,12 @@ Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &m | |||||
| return status; | return status; | ||||
| } | } | ||||
| GE_DUMP(whole_graph_, "AfterAssignedLogicalStreams"); | GE_DUMP(whole_graph_, "AfterAssignedLogicalStreams"); | ||||
| GELOGI("Assign logical streams success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| // After allocating the logical stream in the graph, refresh the stream in the | // After allocating the logical stream in the graph, refresh the stream in the | ||||
| // graph and insert the synchronization node. | // graph and insert the synchronization node. | ||||
| Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_num) { | Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_num) { | ||||
| GELOGI("RefreshRealStream start."); | |||||
| GE_CHECK_NOTNULL(whole_graph_); | GE_CHECK_NOTNULL(whole_graph_); | ||||
| GE_DUMP(whole_graph_, "BeforeRefreshRealStream"); | GE_DUMP(whole_graph_, "BeforeRefreshRealStream"); | ||||
| @@ -174,8 +170,7 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu | |||||
| GELOGI("None of nodes need to assign stream, stream num is 0, it will cause error, so change it to 1"); | GELOGI("None of nodes need to assign stream, stream num is 0, it will cause error, so change it to 1"); | ||||
| stream_num_ = 1; | stream_num_ = 1; | ||||
| } | } | ||||
| GELOGI("stream num: %ld, event num: %u.", stream_num_, event_num_); | |||||
| GELOGI("RefreshRealStream successfully."); | |||||
| GELOGD("stream num: %ld, event num: %u.", stream_num_, event_num_); | |||||
| stream_num = stream_num_; | stream_num = stream_num_; | ||||
| event_num = static_cast<int64_t>(event_num_); | event_num = static_cast<int64_t>(event_num_); | ||||
| @@ -1241,7 +1236,7 @@ void StreamAllocator::DumpEvents() { | |||||
| for (const auto &one_pair : after_refresh_stream_nodes) { | for (const auto &one_pair : after_refresh_stream_nodes) { | ||||
| int64_t stream_id = one_pair.first; | int64_t stream_id = one_pair.first; | ||||
| GELOGI("After RefreshRealStream: stream %ld.", stream_id); | |||||
| GELOGD("After RefreshRealStream: stream %ld.", stream_id); | |||||
| for (const auto &node : one_pair.second) { | for (const auto &node : one_pair.second) { | ||||
| string send_event_str; | string send_event_str; | ||||
| @@ -1273,7 +1268,7 @@ Status StreamAllocator::GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stre | |||||
| GELOGE(FAILED, "Get max stream and task count by rts failed."); | GELOGE(FAILED, "Get max stream and task count by rts failed."); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count); | |||||
| GELOGD("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -30,7 +30,7 @@ StreamGraphOptimizer::~StreamGraphOptimizer() {} | |||||
| void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map) { | void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map) { | ||||
| size_t node_size = comp_graph->GetAllNodesSize(); | size_t node_size = comp_graph->GetAllNodesSize(); | ||||
| GELOGI("Refresh placeholder and end nodeId start from node num: %zu", node_size); | |||||
| GELOGD("Refresh placeholder and end nodeId start from node num: %zu", node_size); | |||||
| for (const auto &subgraph_pair : subgraph_map) { | for (const auto &subgraph_pair : subgraph_map) { | ||||
| for (const auto &subgraph_info : subgraph_pair.second) { | for (const auto &subgraph_info : subgraph_pair.second) { | ||||
| ComputeGraphPtr subgraph = subgraph_info->GetSubGraph(); | ComputeGraphPtr subgraph = subgraph_info->GetSubGraph(); | ||||
| @@ -74,8 +74,6 @@ bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) { | |||||
| Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &comp_graph, | Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &comp_graph, | ||||
| Graph2SubGraphInfoList &subgraph_map, | Graph2SubGraphInfoList &subgraph_map, | ||||
| struct RunContext &run_context) { | struct RunContext &run_context) { | ||||
| GELOGI("Optimize streamed subgraph start."); | |||||
| RefreshNodeId(comp_graph, subgraph_map); | RefreshNodeId(comp_graph, subgraph_map); | ||||
| std::shared_ptr<GELib> instance = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance = ge::GELib::GetInstance(); | ||||
| @@ -86,7 +84,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||||
| ComputeGraphPtr subgraph = subgraph_info->GetSubGraph(); | ComputeGraphPtr subgraph = subgraph_info->GetSubGraph(); | ||||
| GE_CHECK_NOTNULL(subgraph); | GE_CHECK_NOTNULL(subgraph); | ||||
| GELOGI("Optimize subgraph %s", subgraph->GetName().c_str()); | |||||
| GELOGD("Optimize subgraph %s", subgraph->GetName().c_str()); | |||||
| std::string engine_name = subgraph_info->GetEngineName(); | std::string engine_name = subgraph_info->GetEngineName(); | ||||
| @@ -128,7 +126,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||||
| subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret); | subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI( | |||||
| GELOGD( | |||||
| "[optimizeStreamedSubGraph]: optimize streamed subgraph success, subgraph: %s, engine_name: %s, graph " | "[optimizeStreamedSubGraph]: optimize streamed subgraph success, subgraph: %s, engine_name: %s, graph " | ||||
| "Optimizer num: %zu!", | "Optimizer num: %zu!", | ||||
| subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size()); | subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size()); | ||||
| @@ -137,7 +135,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||||
| } | } | ||||
| } | } | ||||
| GELOGI("Optimize streamed subgraph success."); | |||||
| GELOGD("Optimize streamed subgraph success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -68,7 +68,7 @@ TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) { | |||||
| TaskGenerator::~TaskGenerator() {} | TaskGenerator::~TaskGenerator() {} | ||||
| Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context) { | Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context) { | ||||
| GELOGI("Begin to Get TaskInfo. session_id=%lu", session_id); | |||||
| GELOGD("Begin to Get TaskInfo. session_id=%lu", session_id); | |||||
| // Check params | // Check params | ||||
| if (graph == nullptr) { | if (graph == nullptr) { | ||||
| GELOGE(PARAM_INVALID, "GetTaskInfo param graph is null. session_id=%lu", session_id); | GELOGE(PARAM_INVALID, "GetTaskInfo param graph is null. session_id=%lu", session_id); | ||||
| @@ -120,7 +120,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Get TaskInfo success. session_id=%lu", session_id); | |||||
| GELOGD("Get TaskInfo success. session_id=%lu", session_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -232,7 +232,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Fusion: get fusion group numbers [%zu].", fusion_nodes.size()); | |||||
| GELOGD("Fusion: get fusion group numbers [%zu].", fusion_nodes.size()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -575,7 +575,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_ | |||||
| continuous_op_lists.back().emplace_back(op_desc); | continuous_op_lists.back().emplace_back(op_desc); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Number of continuous node lists is %zu.", continuous_op_lists.size()); | |||||
| GELOGD("Number of continuous node lists is %zu.", continuous_op_lists.size()); | |||||
| for (const auto &continuous_ops : continuous_op_lists) { | for (const auto &continuous_ops : continuous_op_lists) { | ||||
| map<string, std::pair<OpDescPtr, OpDescPtr>> first_and_last_ops; | map<string, std::pair<OpDescPtr, OpDescPtr>> first_and_last_ops; | ||||
| @@ -846,13 +846,12 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint | |||||
| Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | ||||
| vector<uint32_t> &all_reduce_nodes) const { | vector<uint32_t> &all_reduce_nodes) const { | ||||
| GELOGI("Start FindProfilingTaskIndex."); | |||||
| GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
| const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ||||
| if (!is_profiling) { | if (!is_profiling) { | ||||
| GELOGW("Profiling is not open."); | |||||
| GELOGD("Profiling is not open."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -115,7 +115,7 @@ Status GraphExecutor::FreeInOutBuffer() { | |||||
| malloc_flag_ = false; | malloc_flag_ = false; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } else { | } else { | ||||
| GELOGI("[GraphManager] not malloc buffer."); | |||||
| GELOGD("[GraphManager] not malloc buffer."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } | } | ||||
| @@ -286,7 +286,7 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Execute model success, model_id:%u.", model_id); | |||||
| GELOGD("Execute model success, model_id:%u.", model_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -131,7 +131,7 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v | |||||
| for (const auto &virtual_args_addr : virtual_args_addrs) { | for (const auto &virtual_args_addr : virtual_args_addrs) { | ||||
| for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | ||||
| src_addrs.push_back(mbuf_list.at(index)); | src_addrs.push_back(mbuf_list.at(index)); | ||||
| dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||||
| dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||||
| } | } | ||||
| } | } | ||||
| index++; | index++; | ||||
| @@ -159,7 +159,6 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_ | |||||
| } | } | ||||
| void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | ||||
| GELOGI("Start to save data %s message", node->GetName().c_str()); | |||||
| if (node != nullptr) { | if (node != nullptr) { | ||||
| auto input_op_desc = node->GetOpDesc(); | auto input_op_desc = node->GetOpDesc(); | ||||
| if (input_op_desc == nullptr) { | if (input_op_desc == nullptr) { | ||||
| @@ -180,7 +179,6 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | |||||
| {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); | {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Save data message successfully"); | |||||
| } | } | ||||
| } | } | ||||
| @@ -218,7 +216,7 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr | |||||
| GELOGW("Get input size failed"); | GELOGW("Get input size failed"); | ||||
| return; | return; | ||||
| } | } | ||||
| GELOGI("Save dump op info, the input size is %ld", input_size); | |||||
| GELOGD("Save dump op info, the input size is %ld", input_size); | |||||
| op_desc_info.input_size.emplace_back(input_size); | op_desc_info.input_size.emplace_back(input_size); | ||||
| } | } | ||||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | ||||
| @@ -234,7 +232,7 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr | |||||
| GELOGW("Get input size failed"); | GELOGW("Get input size failed"); | ||||
| return; | return; | ||||
| } | } | ||||
| GELOGI("Save dump op info, the output size is %ld", output_size); | |||||
| GELOGD("Save dump op info, the output size is %ld", output_size); | |||||
| op_desc_info.output_size.emplace_back(output_size); | op_desc_info.output_size.emplace_back(output_size); | ||||
| } | } | ||||
| op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op); | op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op); | ||||
| @@ -301,22 +299,16 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin | |||||
| if (step_id != 0) { | if (step_id != 0) { | ||||
| GELOGI("step_id exists."); | GELOGI("step_id exists."); | ||||
| op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | ||||
| } else { | |||||
| GELOGI("step_id is null."); | |||||
| } | } | ||||
| if (loop_per_iter != 0) { | if (loop_per_iter != 0) { | ||||
| GELOGI("loop_per_iter exists."); | GELOGI("loop_per_iter exists."); | ||||
| op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | ||||
| } else { | |||||
| GELOGI("loop_per_iter is null."); | |||||
| } | } | ||||
| if (loop_cond != 0) { | if (loop_cond != 0) { | ||||
| GELOGI("loop_cond exists."); | GELOGI("loop_cond exists."); | ||||
| op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | ||||
| } else { | |||||
| GELOGI("loop_cond is null."); | |||||
| } | } | ||||
| } | } | ||||
| @@ -672,7 +664,7 @@ Status DataDumper::LoadDumpInfo() { | |||||
| PrintCheckLog(dump_list_key); | PrintCheckLog(dump_list_key); | ||||
| if (op_list_.empty()) { | if (op_list_.empty()) { | ||||
| GELOGW("op_list_ is empty"); | |||||
| GELOGD("op_list_ is empty"); | |||||
| } | } | ||||
| aicpu::dump::OpMappingInfo op_mapping_info; | aicpu::dump::OpMappingInfo op_mapping_info; | ||||
| @@ -684,8 +676,6 @@ Status DataDumper::LoadDumpInfo() { | |||||
| op_mapping_info.set_flag(kAicpuLoadFlag); | op_mapping_info.set_flag(kAicpuLoadFlag); | ||||
| op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | ||||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | ||||
| GELOGI("Dump step is %s and dump path is %s dump model is %s in load dump info", | |||||
| dump_properties_.GetDumpStep().c_str(), dump_path.c_str(), dump_list_key.c_str()); | |||||
| auto ret = BuildTaskInfo(op_mapping_info); | auto ret = BuildTaskInfo(op_mapping_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Build task info failed"); | GELOGE(ret, "Build task info failed"); | ||||
| @@ -812,7 +802,6 @@ void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void | |||||
| Status DataDumper::UnloadDumpInfo() { | Status DataDumper::UnloadDumpInfo() { | ||||
| if (!load_flag_) { | if (!load_flag_) { | ||||
| GELOGI("No need to UnloadDumpInfo."); | |||||
| load_flag_ = false; | load_flag_ = false; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -838,7 +827,6 @@ Status DataDumper::UnloadDumpInfo() { | |||||
| void DataDumper::PrintCheckLog(string &dump_list_key) { | void DataDumper::PrintCheckLog(string &dump_list_key) { | ||||
| std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | ||||
| if (model_list.empty()) { | if (model_list.empty()) { | ||||
| GELOGI("No model need dump."); | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -17,11 +17,7 @@ | |||||
| #include "graph/load/new_model_manager/davinci_model.h" | #include "graph/load/new_model_manager/davinci_model.h" | ||||
| #include <cce/dnn.h> | #include <cce/dnn.h> | ||||
| #include <dlfcn.h> | |||||
| #include <graph/utils/node_utils.h> | #include <graph/utils/node_utils.h> | ||||
| #include <pthread.h> | |||||
| #include <sched.h> | |||||
| #include <sys/prctl.h> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <map> | #include <map> | ||||
| #include <utility> | #include <utility> | ||||
| @@ -206,7 +202,6 @@ DavinciModel::~DavinciModel() { | |||||
| OpDebugUnRegister(); | OpDebugUnRegister(); | ||||
| GELOGI("do ReleaseTask"); | |||||
| ReleaseTask(); | ReleaseTask(); | ||||
| CleanTbeHandle(); | CleanTbeHandle(); | ||||
| @@ -337,7 +332,6 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||||
| GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | ||||
| weights_mem_base_, weights_size); | weights_mem_base_, weights_size); | ||||
| GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| GELOGI("copy weights data to device"); | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | ||||
| @@ -488,7 +482,7 @@ Status DavinciModel::SetTSDevice() { | |||||
| int64_t value = 0; | int64_t value = 0; | ||||
| bool ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_CORE_TYPE, value); | bool ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_CORE_TYPE, value); | ||||
| uint32_t core_type = ret ? static_cast<uint32_t>(value) : 0; | uint32_t core_type = ret ? static_cast<uint32_t>(value) : 0; | ||||
| GELOGI("SetTSDevice: %u", core_type); | |||||
| GELOGD("SetTSDevice: %u", core_type); | |||||
| rtError_t rt_ret = rtSetTSDevice(core_type); | rtError_t rt_ret = rtSetTSDevice(core_type); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "SetTSDevice failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "SetTSDevice failed, ret: 0x%X", rt_ret); | ||||
| @@ -543,7 +537,6 @@ Status DavinciModel::OpDebugRegister() { | |||||
| } | } | ||||
| void DavinciModel::OpDebugUnRegister() { | void DavinciModel::OpDebugUnRegister() { | ||||
| GELOGI("OpDebugUnRegister, is_op_debug_reg_ = %d", is_op_debug_reg_); | |||||
| if (is_op_debug_reg_) { | if (is_op_debug_reg_) { | ||||
| debug_reg_mutex_.unlock(); | debug_reg_mutex_.unlock(); | ||||
| rtError_t rt_ret = RT_ERROR_NONE; | rtError_t rt_ret = RT_ERROR_NONE; | ||||
| @@ -648,7 +641,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed."); | GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed."); | ||||
| GE_TIMESTAMP_START(InitModelMem); | GE_TIMESTAMP_START(InitModelMem); | ||||
| GELOGI("Known node is %d", known_node_); | |||||
| GELOGD("Known node is %d", known_node_); | |||||
| if (!known_node_) { | if (!known_node_) { | ||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); | ||||
| data_inputer_ = new (std::nothrow) DataInputer(); | data_inputer_ = new (std::nothrow) DataInputer(); | ||||
| @@ -708,7 +701,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| } | } | ||||
| Shrink(); | Shrink(); | ||||
| GELOGI("Davinci model init success."); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -968,18 +960,21 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
| const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | ||||
| const vector<void *> virtual_addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | const vector<void *> virtual_addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | ||||
| const vector<int64_t> output_offset_list = op_desc->GetOutputOffset(); | const vector<int64_t> output_offset_list = op_desc->GetOutputOffset(); | ||||
| if (output_offset_list.size() != virtual_addr_list.size()) { | |||||
| GELOGE(PARAM_INVALID, "virtual_addr size:%zu should be equal to offset size:%zu.", virtual_addr_list.size(), | |||||
| output_offset_list.size()); | |||||
| if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size()) || | |||||
| (output_offset_list.size() != virtual_addr_list.size())) { | |||||
| GELOGE(PARAM_INVALID, "Data[%s] init failed: output size is %zu, virtual_addr size is %zu, offset size is %zu.", | |||||
| op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| auto data_index = data_op_index; | auto data_index = data_op_index; | ||||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | ||||
| GELOGI("ge_train: get new index %u, old %u", data_index, data_op_index); | |||||
| GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index); | |||||
| } | } | ||||
| bool fusion_flag = false; | bool fusion_flag = false; | ||||
| ZeroCopyOffset zero_copy_offset; | ZeroCopyOffset zero_copy_offset; | ||||
| Status ret = zero_copy_offset.InitInputDataInfo(output_size_list, virtual_addr_list, op_desc, fusion_flag); | |||||
| int64_t data_size = output_size_list[kDataIndex]; | |||||
| void *virtual_addr = virtual_addr_list[kDataIndex]; | |||||
| Status ret = zero_copy_offset.InitInputDataInfo(data_size, virtual_addr, op_desc, fusion_flag); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -996,7 +991,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
| new_input_outside_addrs_[addr] = zero_copy_offset; | new_input_outside_addrs_[addr] = zero_copy_offset; | ||||
| } | } | ||||
| GELOGI("SetInputOutsideAddr success."); | |||||
| data_op_index++; | data_op_index++; | ||||
| if (InitInputZeroCopyNodes(node) != SUCCESS) { | if (InitInputZeroCopyNodes(node) != SUCCESS) { | ||||
| GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); | GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); | ||||
| @@ -1131,7 +1125,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
| DisableZeroCopy(real_addr); | DisableZeroCopy(real_addr); | ||||
| real_virtual_addrs_.insert(real_addr); | real_virtual_addrs_.insert(real_addr); | ||||
| } | } | ||||
| GELOGI("SetOutputOutsideAddr success."); | |||||
| } | } | ||||
| GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, | GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, | ||||
| @@ -1147,8 +1140,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
| GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | ||||
| GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | ||||
| } | } | ||||
| GELOGI("DavinciModel::InitNetoutput success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1459,7 +1450,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| if (label_index >= LabelNum()) { | if (label_index >= LabelNum()) { | ||||
| GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %zu.", label_index, LabelNum()); | |||||
| GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %u.", label_index, LabelNum()); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| if (label_id_indication_.count(label_index) > 0) { | if (label_id_indication_.count(label_index) > 0) { | ||||
| @@ -1968,7 +1959,6 @@ void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_i | |||||
| if (op->GetType() != NETOUTPUT) { | if (op->GetType() != NETOUTPUT) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| GELOGI("Start to get dynamic output dims attr"); | |||||
| if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | ||||
| GELOGD("Can not get dynamic output dims attr"); | GELOGD("Can not get dynamic output dims attr"); | ||||
| } | } | ||||
| @@ -2124,7 +2114,7 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD | |||||
| } | } | ||||
| Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | ||||
| GELOGI("Output node size: %zu", output_op_list_.size()); | |||||
| GELOGD("Output node size: %zu", output_op_list_.size()); | |||||
| for (size_t i = 0; i < output_op_list_.size(); i++) { | for (size_t i = 0; i < output_op_list_.size(); i++) { | ||||
| auto &op_desc = output_op_list_[i]; | auto &op_desc = output_op_list_[i]; | ||||
| uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | ||||
| @@ -2187,7 +2177,7 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data | |||||
| void *mem_addr = data.second.GetBasicAddr(); | void *mem_addr = data.second.GetBasicAddr(); | ||||
| void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data)); | void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data)); | ||||
| uint64_t data_buf_length = data_buf.length; | uint64_t data_buf_length = data_buf.length; | ||||
| GELOGI("[IMAS]CopyPlainData memcpy graph_%lu type[F] input[%lu] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", | |||||
| GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", | |||||
| runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length); | runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length); | ||||
| GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind)); | GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind)); | ||||
| } | } | ||||
| @@ -2235,8 +2225,6 @@ Status DavinciModel::SinkModelProfile() { | |||||
| Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); | Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); | ||||
| GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); | GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); | ||||
| GELOGI("Start collect model load profiling data."); | |||||
| Msprof::Engine::ReporterData reporter_data{}; | Msprof::Engine::ReporterData reporter_data{}; | ||||
| // report model data tag name | // report model data tag name | ||||
| std::string tag_name; | std::string tag_name; | ||||
| @@ -2294,7 +2282,6 @@ Status DavinciModel::SinkModelProfile() { | |||||
| uint32_t op_num = fusion_op_info->original_op_names.size(); | uint32_t op_num = fusion_op_info->original_op_names.size(); | ||||
| uint32_t task_id = task->GetTaskID(); | uint32_t task_id = task->GetTaskID(); | ||||
| if (op_num > 0) { | if (op_num > 0) { | ||||
| GELOGI("task.id = %u, opNum = %u", task_id, op_num); | |||||
| op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); | op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); | ||||
| } | } | ||||
| } | } | ||||
| @@ -2552,21 +2539,23 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
| if (is_dynamic_) { | if (is_dynamic_) { | ||||
| GELOGI("No need to check output data size."); | GELOGI("No need to check output data size."); | ||||
| } else if (buffer.length < mem_size) { | } else if (buffer.length < mem_size) { | ||||
| GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | |||||
| GELOGE(FAILED, "Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length); | |||||
| return FAILED; | return FAILED; | ||||
| } else if (buffer.length > mem_size) { | } else if (buffer.length > mem_size) { | ||||
| GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | |||||
| GELOGW("Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length); | |||||
| } | } | ||||
| int64_t data_size = output.second.GetDataSize(); | int64_t data_size = output.second.GetDataSize(); | ||||
| if (is_online_infer_dynamic_) { | if (is_online_infer_dynamic_) { | ||||
| auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx]; | |||||
| data_size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||||
| if (merge_nodes_gear_and_real_out_size_info_.find(idx) != merge_nodes_gear_and_real_out_size_info_.end()) { | |||||
| auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx]; | |||||
| data_size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||||
| } | |||||
| } | } | ||||
| uint64_t buffer_length = buffer.length; | uint64_t buffer_length = buffer.length; | ||||
| void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | ||||
| GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%ld] datasize[%u]", | |||||
| GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", | |||||
| runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | ||||
| GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | ||||
| idx++; | idx++; | ||||
| @@ -2598,11 +2587,13 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data | |||||
| return ret); | return ret); | ||||
| std::vector<int64_t> output_shape = input_desc->GetShape().GetDims(); | std::vector<int64_t> output_shape = input_desc->GetShape().GetDims(); | ||||
| if (is_online_infer_dynamic_) { | if (is_online_infer_dynamic_) { | ||||
| auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; | |||||
| size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||||
| auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; | |||||
| output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; | |||||
| is_dynamic_ = true; | |||||
| if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { | |||||
| auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; | |||||
| size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||||
| auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; | |||||
| output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; | |||||
| is_dynamic_ = true; | |||||
| } | |||||
| } | } | ||||
| GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | ||||
| out_buffer_size_vec.push_back(size); | out_buffer_size_vec.push_back(size); | ||||
| @@ -2759,16 +2750,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| InputData current_data = data_wrapper->GetInput(); | InputData current_data = data_wrapper->GetInput(); | ||||
| GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id, current_data.index); | GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id, current_data.index); | ||||
| if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { | |||||
| model->cur_dynamic_dims_.clear(); | |||||
| GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); | |||||
| auto shape_data_buffer_data = current_data.blobs.back().data; | |||||
| auto shape_data_buffer_length = current_data.blobs.back().length; | |||||
| model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data), | |||||
| reinterpret_cast<int64_t *>(shape_data_buffer_data) + | |||||
| shape_data_buffer_length / sizeof(int64_t)); | |||||
| GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | |||||
| } | |||||
| GE_TIMESTAMP_START(Model_SyncVarData); | GE_TIMESTAMP_START(Model_SyncVarData); | ||||
| ret = model->SyncVarData(); | ret = model->SyncVarData(); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| @@ -2785,6 +2766,18 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | ||||
| continue, "Copy input data to model failed."); // [No need to check value] | continue, "Copy input data to model failed."); // [No need to check value] | ||||
| if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { | |||||
| model->cur_dynamic_dims_.clear(); | |||||
| GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); | |||||
| auto shape_data_buffer_data = current_data.blobs.back().data; | |||||
| auto shape_data_buffer_length = current_data.blobs.back().length; | |||||
| model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data), | |||||
| reinterpret_cast<int64_t *>(shape_data_buffer_data) + | |||||
| shape_data_buffer_length / sizeof(int64_t)); | |||||
| GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | |||||
| delete[] (int64_t *)current_data.blobs.back().data; | |||||
| current_data.blobs.pop_back(); | |||||
| } | |||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); | ||||
| if (ProfilingManager::Instance().ProfilingOpTraceOn()) { | if (ProfilingManager::Instance().ProfilingOpTraceOn()) { | ||||
| @@ -2982,7 +2975,7 @@ void DavinciModel::UnbindTaskSinkStream() { | |||||
| Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) { | Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) { | ||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); | GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); | ||||
| if (inputs.size() > data_op_list_.size()) { | if (inputs.size() > data_op_list_.size()) { | ||||
| GELOGE(FAILED, "input data addr %u should less than input op number %u.", inputs.size(), data_op_list_.size()); | |||||
| GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| // remove zero copy addr in last iteration | // remove zero copy addr in last iteration | ||||
| @@ -2991,16 +2984,16 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||||
| for (size_t i = 0; i < inputs.size(); ++i) { | for (size_t i = 0; i < inputs.size(); ++i) { | ||||
| const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); | const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); | ||||
| knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; | knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; | ||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap input %d,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); | |||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); | |||||
| } | } | ||||
| if (output_op_list_.size() < kOutputNum) { | if (output_op_list_.size() < kOutputNum) { | ||||
| GELOGW("output op num in graph is %u.", output_op_list_.size()); | |||||
| GELOGW("output op num in graph is %zu.", output_op_list_.size()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | ||||
| for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | ||||
| knonw_output_data_info_[addr_list[i]] = outputs[i]; | knonw_output_data_info_[addr_list[i]] = outputs[i]; | ||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap output %d,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | |||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | |||||
| } | } | ||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); | GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -3010,13 +3003,13 @@ Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||||
| for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | ||||
| auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | ||||
| if (it_in != knonw_input_data_info_.end()) { | if (it_in != knonw_input_data_info_.end()) { | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %d,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| knonw_input_data_info_.at(total_io_addrs_[i])); | knonw_input_data_info_.at(total_io_addrs_[i])); | ||||
| total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | ||||
| } | } | ||||
| auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | ||||
| if (it_out != knonw_output_data_info_.end()) { | if (it_out != knonw_output_data_info_.end()) { | ||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %d,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
| knonw_output_data_info_.at(total_io_addrs_[i])); | knonw_output_data_info_.at(total_io_addrs_[i])); | ||||
| total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | ||||
| } | } | ||||
| @@ -3037,7 +3030,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| if (task != nullptr) { | if (task != nullptr) { | ||||
| Status ret = task->UpdateArgs(); | Status ret = task->UpdateArgs(); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(FAILED, "task %d created by davinci model is nullptr.", task_index); | |||||
| GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -3066,7 +3059,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| } | } | ||||
| Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | ||||
| GELOGI("InitTaskInfo in, task size %zu", model_task_def.task().size()); | |||||
| GELOGI("InitTaskInfo in, task size %d", model_task_def.task().size()); | |||||
| task_list_.resize(model_task_def.task_size()); | task_list_.resize(model_task_def.task_size()); | ||||
| for (int i = 0; i < model_task_def.task_size(); ++i) { | for (int i = 0; i < model_task_def.task_size(); ++i) { | ||||
| // dynamic shape will create task_list_ before | // dynamic shape will create task_list_ before | ||||
| @@ -3142,14 +3135,14 @@ Status DavinciModel::DistributeTask() { | |||||
| task_desc_info_.clear(); | task_desc_info_.clear(); | ||||
| bool flag = GetL1FusionEnableOption(); | bool flag = GetL1FusionEnableOption(); | ||||
| char *skt_enable_env = std::getenv("SKT_ENABLE"); | |||||
| int64_t env_flag = (skt_enable_env != nullptr) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
| if (env_flag != 0) { | if (env_flag != 0) { | ||||
| flag = true; | flag = true; | ||||
| } | } | ||||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
| GELOGI("there are %zu task need to save.", task_list_.size()); | |||||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | ||||
| auto &task = task_list_.at(task_index); | auto &task = task_list_.at(task_index); | ||||
| GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | ||||
| @@ -3331,7 +3324,7 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||||
| if (input_size > op_size) { | if (input_size > op_size) { | ||||
| GELOGW( | GELOGW( | ||||
| "Input size [%u] is bigger than om size need [%u], " | |||||
| "Input size [%ld] is bigger than om size need [%ld], " | |||||
| "MAY cause inference result ERROR, please check model input", | "MAY cause inference result ERROR, please check model input", | ||||
| input_size, op_size); | input_size, op_size); | ||||
| } | } | ||||
| @@ -3413,7 +3406,7 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
| for (const auto &data : data_info) { | for (const auto &data : data_info) { | ||||
| if (data.first >= blobs.size()) { // check data index. | if (data.first >= blobs.size()) { // check data index. | ||||
| GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu", | |||||
| GELOGE(FAILED, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
| input_or_output.c_str(), data.first, blobs.size()); | input_or_output.c_str(), data.first, blobs.size()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -3522,7 +3515,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { | |||||
| GeTensor *tensor = const_cast<GeTensor *>(v_weights[0].get()); | GeTensor *tensor = const_cast<GeTensor *>(v_weights[0].get()); | ||||
| GE_IF_BOOL_EXEC(static_cast<size_t>(v_output_size[0]) < tensor->GetData().size(), | GE_IF_BOOL_EXEC(static_cast<size_t>(v_output_size[0]) < tensor->GetData().size(), | ||||
| GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], | |||||
| GELOGE(PARAM_INVALID, "output size:%ld less than weight data size:%zu", v_output_size[0], | |||||
| tensor->GetData().size()); | tensor->GetData().size()); | ||||
| return PARAM_INVALID;); | return PARAM_INVALID;); | ||||
| @@ -3546,12 +3539,12 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { | |||||
| uint64_t offset = static_cast<uint64_t>(elem_num * kBytes); | uint64_t offset = static_cast<uint64_t>(elem_num * kBytes); | ||||
| uint64_t hbm_raw_data_base_addr = | uint64_t hbm_raw_data_base_addr = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_output_addr[0])) + offset; | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_output_addr[0])) + offset; | |||||
| for (int64_t i = elem_num - 1; i >= 0; --i) { | for (int64_t i = elem_num - 1; i >= 0; --i) { | ||||
| buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); | buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%u] datasize[%zu]", | |||||
| GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%lu] datasize[%zu]", | |||||
| runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr[0], v_output_size[0], | runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr[0], v_output_size[0], | ||||
| tensor->GetData().size()); | tensor->GetData().size()); | ||||
| GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], tensor->GetData().data(), tensor->GetData().size(), | GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], tensor->GetData().data(), tensor->GetData().size(), | ||||
| @@ -3582,12 +3575,12 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||||
| if (rtQueryFunctionRegistered(bin_file_key) != RT_ERROR_NONE) { | if (rtQueryFunctionRegistered(bin_file_key) != RT_ERROR_NONE) { | ||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| if (!kernel_store.FindTBEHandle(bin_file_key, bin_handle)) { | if (!kernel_store.FindTBEHandle(bin_file_key, bin_handle)) { | ||||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key); | |||||
| GELOGD("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key); | |||||
| rtDevBinary_t binary; | rtDevBinary_t binary; | ||||
| std::string json_string; | std::string json_string; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_MAGIC, json_string), | GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_MAGIC, json_string), | ||||
| GELOGI("Get original type of session_graph_id.")); | |||||
| GELOGD("Get original type of session_graph_id.")); | |||||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | ||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | ||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | ||||
| @@ -3603,13 +3596,13 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||||
| binary.data = tbe_kernel->GetBinData(); | binary.data = tbe_kernel->GetBinData(); | ||||
| binary.length = tbe_kernel->GetBinDataSize(); | binary.length = tbe_kernel->GetBinDataSize(); | ||||
| GELOGI("TBE: binary.length: %lu", binary.length); | |||||
| GELOGD("TBE: binary.length: %lu", binary.length); | |||||
| GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | ||||
| std::string meta_data; | std::string meta_data; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_METADATA, meta_data), | GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_METADATA, meta_data), | ||||
| GELOGI("Get original type of json_string")); | GELOGI("Get original type of json_string")); | ||||
| GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | |||||
| GELOGD("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | |||||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | ||||
| kernel_store.StoreTBEHandle(bin_file_key, bin_handle, tbe_kernel); | kernel_store.StoreTBEHandle(bin_file_key, bin_handle, tbe_kernel); | ||||
| @@ -3620,8 +3613,7 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||||
| std::string kernel_name; | std::string kernel_name; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name), | GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name), | ||||
| GELOGI("Get original type of kernel_name")); | |||||
| GELOGI("TBE: binfile_key=%s, kernel_name=%s", bin_file_key, kernel_name.c_str()); | |||||
| GELOGD("Get original type of kernel_name")); | |||||
| GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0)); | GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0)); | ||||
| used_tbe_handle_map_[bin_file_key] = 1; // Init used num to 1. | used_tbe_handle_map_[bin_file_key] = 1; // Init used num to 1. | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -3816,7 +3808,7 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { | |||||
| Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data, | Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data, | ||||
| OutputData &output_data) { | OutputData &output_data) { | ||||
| is_async_mode_ = async_mode; | is_async_mode_ = async_mode; | ||||
| GELOGI("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | |||||
| GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | |||||
| GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | ||||
| is_dynamic_ = input_data.is_dynamic_batch; | is_dynamic_ = input_data.is_dynamic_batch; | ||||
| if (!is_dynamic_) { | if (!is_dynamic_) { | ||||
| @@ -3828,7 +3820,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | ||||
| model_id_); | model_id_); | ||||
| GELOGI("current_data.index=%u", input_data.index); | |||||
| GELOGD("current_data.index=%u", input_data.index); | |||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | ||||
| if (!task_list_.empty()) { | if (!task_list_.empty()) { | ||||
| @@ -3837,7 +3829,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | ||||
| GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | ||||
| GELOGI("rtModelExecute end"); | |||||
| GELOGD("rtModelExecute end"); | |||||
| } | } | ||||
| if (!is_async_mode_) { | if (!is_async_mode_) { | ||||
| @@ -3849,7 +3841,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| // report model time data | // report model time data | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); | ||||
| GELOGI("Model run end, model id:%u", model_id_); | |||||
| GELOGD("Model run end, model id:%u", model_id_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -3906,7 +3898,9 @@ Status DavinciModel::InitEntryTask() { | |||||
| uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { | uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { | ||||
| uint8_t *mem_base = nullptr; | uint8_t *mem_base = nullptr; | ||||
| const string purpose("feature map,used for op input and output."); | const string purpose("feature map,used for op input and output."); | ||||
| if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||||
| char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||||
| if (res == EN_OK) { | |||||
| data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); | data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); | ||||
| string memory_key = std::to_string(0) + "_f"; | string memory_key = std::to_string(0) + "_f"; | ||||
| mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId()); | mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId()); | ||||
| @@ -3936,7 +3930,9 @@ uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { | |||||
| uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | ||||
| uint8_t *weights_mem_base = nullptr; | uint8_t *weights_mem_base = nullptr; | ||||
| const string purpose("weights memory in inference network."); | const string purpose("weights memory in inference network."); | ||||
| if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||||
| char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||||
| if (res == EN_OK) { | |||||
| string weight_memory_key = std::to_string(0) + "_w"; | string weight_memory_key = std::to_string(0) + "_w"; | ||||
| weights_mem_base = | weights_mem_base = | ||||
| MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); | MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); | ||||
| @@ -3947,7 +3943,9 @@ uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | |||||
| } | } | ||||
| void DavinciModel::FreeFeatureMapMem() { | void DavinciModel::FreeFeatureMapMem() { | ||||
| if (std::getenv(kEnvGeuseStaticMemory) != nullptr && is_inner_mem_base_) { | |||||
| char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||||
| if (res == EN_OK && is_inner_mem_base_) { | |||||
| string weight_memory_key = std::to_string(0) + "_f"; | string weight_memory_key = std::to_string(0) + "_f"; | ||||
| if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { | if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { | ||||
| GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), | GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), | ||||
| @@ -3979,7 +3977,9 @@ void DavinciModel::FreeP2PMem() { | |||||
| } | } | ||||
| void DavinciModel::FreeWeightsMem() { | void DavinciModel::FreeWeightsMem() { | ||||
| if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||||
| char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||||
| if (res == EN_OK) { | |||||
| string memory_key = std::to_string(0) + "_w"; | string memory_key = std::to_string(0) + "_w"; | ||||
| if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) { | if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) { | ||||
| GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()), | GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()), | ||||
| @@ -3995,7 +3995,6 @@ void DavinciModel::FreeWeightsMem() { | |||||
| } | } | ||||
| Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { | Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { | ||||
| GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id); | |||||
| rtContext_t ctx = nullptr; | rtContext_t ctx = nullptr; | ||||
| rtError_t rt_ret = rtCtxGetCurrent(&ctx); | rtError_t rt_ret = rtCtxGetCurrent(&ctx); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -4016,13 +4015,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||||
| GE_CHK_STATUS_RET_NOLOG( | GE_CHK_STATUS_RET_NOLOG( | ||||
| TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); | TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); | ||||
| GELOGI("TransAllVarData success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | ||||
| GELOGI("set data dumper args, name: %s, id: %u.", name_.c_str(), model_id_); | |||||
| data_dumper_.SetModelName(name_); | data_dumper_.SetModelName(name_); | ||||
| data_dumper_.SetModelId(model_id_); | data_dumper_.SetModelId(model_id_); | ||||
| data_dumper_.SetOmName(om_name_); | data_dumper_.SetOmName(om_name_); | ||||
| @@ -4048,15 +4044,13 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| return v_output_addr[0]; | return v_output_addr[0]; | ||||
| } | } | ||||
| GELOGW("op is null."); | |||||
| GELOGD("op is null."); | |||||
| return nullptr; | return nullptr; | ||||
| }; | }; | ||||
| data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_), | data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_), | ||||
| get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_), | get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_), | ||||
| get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_)); | get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_)); | ||||
| GELOGI("SetDataDumperArgs end."); | |||||
| } | } | ||||
| uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | ||||
| @@ -4075,7 +4069,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||||
| } | } | ||||
| Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | ||||
| GELOGI("GetComputeGraphInfo start."); | |||||
| auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | ||||
| for (auto &op_desc : all_op_desc) { | for (auto &op_desc : all_op_desc) { | ||||
| ComputeGraphDescInfo compute_graph_info; | ComputeGraphDescInfo compute_graph_info; | ||||
| @@ -4095,7 +4088,6 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||||
| graph_desc_info.emplace_back(compute_graph_info); | graph_desc_info.emplace_back(compute_graph_info); | ||||
| } | } | ||||
| GELOGI("GetComputeGraphInfo end."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -4160,7 +4152,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input | |||||
| vector<std::string> inputs; | vector<std::string> inputs; | ||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | ||||
| GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size()); | |||||
| GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size()); | |||||
| for (auto it : inputs) { | for (auto it : inputs) { | ||||
| InputOutputDims input_info; | InputOutputDims input_info; | ||||
| ParseAIPPInfo(it, input_info); | ParseAIPPInfo(it, input_info); | ||||
| @@ -4171,7 +4163,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input | |||||
| int64_t data_input_size; | int64_t data_input_size; | ||||
| (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | ||||
| GELOGD( | GELOGD( | ||||
| "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " | |||||
| "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: " | |||||
| "%s, data_type: %s, shape: %s .", | "%s, data_type: %s, shape: %s .", | ||||
| index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | ||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | ||||
| @@ -202,7 +202,6 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||||
| } | } | ||||
| ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | ||||
| GELOGI("Destroy aicpu session for infer, model id is %u.", model_id); | |||||
| std::lock_guard<std::mutex> lock(map_mutex_); | std::lock_guard<std::mutex> lock(map_mutex_); | ||||
| auto it = model_map_.find(model_id); | auto it = model_map_.find(model_id); | ||||
| if (it == model_map_.end()) { | if (it == model_map_.end()) { | ||||
| @@ -210,7 +209,6 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||||
| return GE_EXEC_MODEL_ID_INVALID; | return GE_EXEC_MODEL_ID_INVALID; | ||||
| } | } | ||||
| uint64_t session_id = it->second->GetSessionId(); | uint64_t session_id = it->second->GetSessionId(); | ||||
| GELOGI("Destroy aicpu session for infer, session id is %lu.", session_id); | |||||
| DestroyAicpuSession(session_id); | DestroyAicpuSession(session_id); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -407,10 +405,6 @@ Status ModelManager::Unload(uint32_t model_id) { | |||||
| } | } | ||||
| std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | ||||
| exception_infos_.clear(); | exception_infos_.clear(); | ||||
| for (auto addr : shape_data_addrs_[model_id]) { | |||||
| delete[] addr; | |||||
| } | |||||
| shape_data_addrs_.erase(model_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -475,6 +469,19 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_ | |||||
| } | } | ||||
| } | } | ||||
| GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str()); | GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str()); | ||||
| bool cur_dynamic_dims_valid = false; | |||||
| std::vector<std::string> shape_strs = ge::StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';'); | |||||
| for (auto dynamic_dim : shape_strs) { | |||||
| if (dynamic_dim == formats::JoinToString(cur_dynamic_dims)) { | |||||
| cur_dynamic_dims_valid = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!cur_dynamic_dims_valid) { | |||||
| GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.", | |||||
| formats::JoinToString(cur_dynamic_dims).c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -517,7 +524,6 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||||
| "Failed to memcpy data."); | "Failed to memcpy data."); | ||||
| data.length = length; | data.length = length; | ||||
| input_data.blobs.push_back(data); | input_data.blobs.push_back(data); | ||||
| shape_data_addrs_[model_id].emplace_back(reinterpret_cast<int64_t *>(data.data)); | |||||
| } | } | ||||
| } | } | ||||
| @@ -1019,8 +1025,8 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp | |||||
| Status ModelManager::GenSessionId(uint64_t &session_id) { | Status ModelManager::GenSessionId(uint64_t &session_id) { | ||||
| std::lock_guard<std::mutex> lock(session_id_create_mutex_); | std::lock_guard<std::mutex> lock(session_id_create_mutex_); | ||||
| struct timeval tv; | |||||
| if (gettimeofday(&tv, nullptr) != 0) { | |||||
| mmTimeval tv; | |||||
| if (mmGetTimeOfDay(&tv, nullptr) != 0) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to get current time."); | GELOGE(INTERNAL_ERROR, "Failed to get current time."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -1037,8 +1043,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { | |||||
| Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | ||||
| void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | ||||
| GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, | |||||
| ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, | |||||
| GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, | |||||
| ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, | |||||
| "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); | "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); | ||||
| GenModelId(&model_id); | GenModelId(&model_id); | ||||
| @@ -1123,7 +1129,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
| Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | ||||
| const std::vector<uint32_t> &input_queue_ids, | const std::vector<uint32_t> &input_queue_ids, | ||||
| const std::vector<uint32_t> &output_queue_ids) { | const std::vector<uint32_t> &output_queue_ids) { | ||||
| GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, | |||||
| GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || mmAccess2(model_data.key.c_str(), M_F_OK) == EN_OK, | |||||
| ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s", | ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s", | ||||
| model_data.key.c_str(), strerror(errno)); | model_data.key.c_str(), strerror(errno)); | ||||
| @@ -1205,7 +1211,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||||
| Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); | Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); | ||||
| if (status == SUCCESS) { | if (status == SUCCESS) { | ||||
| GELOGI("Execute model %u success.", model_id); | |||||
| GELOGD("Execute model %u success.", model_id); | |||||
| } | } | ||||
| return status; | return status; | ||||
| @@ -1262,7 +1268,6 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ | |||||
| } | } | ||||
| Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | ||||
| GELOGI("LaunchCustAucpuSo in, kernel name %s", kernel_name.c_str()); | |||||
| std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | ||||
| if (cust_aicpu_so_.size() == 0) return SUCCESS; | if (cust_aicpu_so_.size() == 0) return SUCCESS; | ||||
| // get current context | // get current context | ||||
| @@ -18,7 +18,6 @@ | |||||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | ||||
| #include <model/ge_root_model.h> | #include <model/ge_root_model.h> | ||||
| #include <pthread.h> | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <map> | #include <map> | ||||
| @@ -364,7 +363,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_; | std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_; | ||||
| static DumpProperties dump_properties_; | static DumpProperties dump_properties_; | ||||
| std::map<uint32_t, std::vector<int64_t *>> shape_data_addrs_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -337,9 +337,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
| continue; | continue; | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), | |||||
| GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); | |||||
| break); | |||||
| GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), break); | |||||
| int64_t input_offset = v_input_offset[non_const_index]; | int64_t input_offset = v_input_offset[non_const_index]; | ||||
| non_const_index++; | non_const_index++; | ||||
| @@ -356,7 +354,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
| // feature maps | // feature maps | ||||
| void *mem_addr = nullptr; | void *mem_addr = nullptr; | ||||
| if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion | if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion | ||||
| mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset)); | |||||
| mem_addr = reinterpret_cast<uint8_t *>(static_cast<intptr_t>(input_offset)); | |||||
| v_input_data_addr.push_back(mem_addr); | v_input_data_addr.push_back(mem_addr); | ||||
| } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { | } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { | ||||
| int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
| @@ -424,7 +422,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||||
| // feature maps | // feature maps | ||||
| void *mem_addr = nullptr; | void *mem_addr = nullptr; | ||||
| if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion | if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion | ||||
| mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i])); | |||||
| mem_addr = reinterpret_cast<uint8_t *>(static_cast<intptr_t>(v_output_offset[i])); | |||||
| v_output_data_addr.push_back(mem_addr); | v_output_data_addr.push_back(mem_addr); | ||||
| } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { | } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { | ||||
| const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | ||||
| @@ -500,7 +498,7 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||||
| continue; | continue; | ||||
| } | } | ||||
| if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { | if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { | ||||
| v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | |||||
| v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(static_cast<intptr_t>(v_workspace_offset[i]))); | |||||
| GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", | GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", | ||||
| model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); | model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); | ||||
| } else if (v_workspace_bytes[i] == 0) { | } else if (v_workspace_bytes[i] == 0) { | ||||
| @@ -149,7 +149,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| uint64_t workspace_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_data_addrs[0])); | |||||
| uint64_t workspace_base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_data_addrs[0])); | |||||
| const vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | const vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | ||||
| const vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | const vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | ||||
| vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
| @@ -287,7 +287,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const | |||||
| } | } | ||||
| if (workspace_data_sizes[0] < static_cast<int64_t>(kernel_def.task_info_size())) { | if (workspace_data_sizes[0] < static_cast<int64_t>(kernel_def.task_info_size())) { | ||||
| GELOGE(FAILED, "Node:%s workspace size is %zu, task info size is %zu.", op_desc->GetName().c_str(), | |||||
| GELOGE(FAILED, "Node:%s workspace size is %ld, task info size is %d.", op_desc->GetName().c_str(), | |||||
| workspace_data_sizes[0], kernel_def.task_info_size()); | workspace_data_sizes[0], kernel_def.task_info_size()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -31,6 +31,7 @@ | |||||
| #include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
| #include "super_kernel/super_kernel.h" | #include "super_kernel/super_kernel.h" | ||||
| #include "super_kernel/super_kernel_factory.h" | #include "super_kernel/super_kernel_factory.h" | ||||
| #include "cce/aicpu_engine_struct.h" | |||||
| namespace { | namespace { | ||||
| const uint8_t kL2LoadToDdr = 1; | const uint8_t kL2LoadToDdr = 1; | ||||
| @@ -73,7 +74,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
| GELOGD("node[%s] is_n_batch_spilt %d", op_desc_->GetName().c_str(), is_n_batch_spilt_); | GELOGD("node[%s] is_n_batch_spilt %d", op_desc_->GetName().c_str(), is_n_batch_spilt_); | ||||
| (void)AttrUtils::GetInt(*op_desc_, ATTR_NAME_FUSION_GROUP_KEY, group_key_); | (void)AttrUtils::GetInt(*op_desc_, ATTR_NAME_FUSION_GROUP_KEY, group_key_); | ||||
| has_group_key_ = (group_key_ != kInvalidGroupKey); | has_group_key_ = (group_key_ != kInvalidGroupKey); | ||||
| GELOGD("node[%s] has_group_key_ %ld, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_); | |||||
| GELOGD("node[%s] has_group_key_ %d, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_); | |||||
| // fusion_op_info | // fusion_op_info | ||||
| vector<std::string> original_op_names; | vector<std::string> original_op_names; | ||||
| bool result = AttrUtils::GetListStr(op_desc_, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names); | bool result = AttrUtils::GetListStr(op_desc_, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names); | ||||
| @@ -176,7 +178,7 @@ void KernelTaskInfo::UpdateTaskId() { | |||||
| } | } | ||||
| task_id_ = task_id; | task_id_ = task_id; | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| GELOGI("UpdateTaskId:UpdateTaskId [%u], stream id [%u]:", task_id, stream_id); | |||||
| GELOGD("UpdateTaskId:UpdateTaskId [%u], stream id [%u]:", task_id, stream_id); | |||||
| } | } | ||||
| } | } | ||||
| @@ -216,7 +218,7 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||||
| rtError_t rt_ret; | rtError_t rt_ret; | ||||
| auto &skt_kernel_list = skt_info_.kernel_list; | auto &skt_kernel_list = skt_info_.kernel_list; | ||||
| auto &skt_arg_list = skt_info_.arg_list; | auto &skt_arg_list = skt_info_.arg_list; | ||||
| GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size()); | |||||
| GELOGI("SuperKernelLaunch: Skt_kernel_list size[%zu] skt_arg_list[%zu]", skt_kernel_list.size(), skt_arg_list.size()); | |||||
| if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) { | if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) { | ||||
| rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim), | rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim), | ||||
| skt_info_.arg_list[0], skt_info_.last_args_size, | skt_info_.arg_list[0], skt_info_.last_args_size, | ||||
| @@ -367,8 +369,9 @@ Status KernelTaskInfo::Distribute() { | |||||
| GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | ||||
| } | } | ||||
| rtError_t rt_ret = RT_ERROR_NONE; | rtError_t rt_ret = RT_ERROR_NONE; | ||||
| char *skt_enable_env = getenv("SKT_ENABLE"); | |||||
| int64_t env_flag = (skt_enable_env != nullptr) ? strtol(skt_enable_env, nullptr, 10) : 0; | |||||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
| int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0; | |||||
| bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); | bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); | ||||
| if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { | if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { | ||||
| GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | ||||
| @@ -379,7 +382,7 @@ Status KernelTaskInfo::Distribute() { | |||||
| call_save_dump_ = true; | call_save_dump_ = true; | ||||
| } else { | } else { | ||||
| /* default: not skt launch */ | /* default: not skt launch */ | ||||
| GELOGI( | |||||
| GELOGD( | |||||
| "KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " | "KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " | ||||
| "stubfunc:%p blockdim:%u stream:%p", | "stubfunc:%p blockdim:%u stream:%p", | ||||
| call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | ||||
| @@ -406,7 +409,7 @@ Status KernelTaskInfo::Distribute() { | |||||
| } | } | ||||
| // set for task_id_ | // set for task_id_ | ||||
| UpdateTaskId(); | UpdateTaskId(); | ||||
| GELOGI( | |||||
| GELOGD( | |||||
| "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " | "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " | ||||
| "blockdim:%d stream:%p", | "blockdim:%d stream:%p", | ||||
| call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | ||||
| @@ -747,15 +750,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||||
| } | } | ||||
| } | } | ||||
| *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[0])) = | *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[0])) = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0 | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0 | |||||
| *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[1])) = | *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[1])) = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1 | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1 | |||||
| *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[2])) = | *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[2])) = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2 | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2 | |||||
| *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[3])) = | *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[3])) = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3 | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3 | |||||
| *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[4])) = | *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[4])) = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4 | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4 | |||||
| rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -913,7 +916,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | ||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | ||||
| aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size()); | |||||
| aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
| // malloc device memory for args | // malloc device memory for args | ||||
| rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | ||||
| @@ -956,12 +959,40 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
| if (ext_info.empty()) { | if (ext_info.empty()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| std::unique_ptr<uint8_t[]> copy_ext_info; | |||||
| copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]); | |||||
| GE_CHECK_NOTNULL(copy_ext_info); | |||||
| auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size()); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| auto ext_info_data = copy_ext_info.get(); | |||||
| size_t offset = 0; | |||||
| while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) { | |||||
| auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset); | |||||
| GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||||
| if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO) { | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(SessionInfo), PARAM_INVALID, | |||||
| "Parse ext session info failed as infoLen must be %zu but %u.", | |||||
| sizeof(SessionInfo), aicpu_ext_info->infoLen); | |||||
| SessionInfo *session_info = reinterpret_cast<SessionInfo *>(aicpu_ext_info->infoMsg); | |||||
| session_info->sessionId = davinci_model_->GetSessionId(); | |||||
| session_info->sessFlag = true; | |||||
| GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); | |||||
| } | |||||
| offset += sizeof(aicpu::FWKAdapter::ExtInfo); | |||||
| offset += aicpu_ext_info->infoLen; | |||||
| } | |||||
| auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| @@ -1122,18 +1153,24 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u | |||||
| } | } | ||||
| GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath.c_str()); | GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath.c_str()); | ||||
| auto handle = dlopen(canonicalPath.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| auto handle = mmDlopen(canonicalPath.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| const char *error = ""; | |||||
| if (handle == nullptr) { | if (handle == nullptr) { | ||||
| GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); | |||||
| error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| cce::ccStatus_t cc_ret; | cce::ccStatus_t cc_ret; | ||||
| std::string update_kernel_args = "ccUpdateKernelArgs"; | |||||
| auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, | auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, | ||||
| void *))dlsym(handle, "ccUpdateKernelArgs"); | |||||
| void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str())); | |||||
| if (cceUpdateKernelArgs == nullptr) { | if (cceUpdateKernelArgs == nullptr) { | ||||
| GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); | GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); | ||||
| if (dlclose(handle) != 0) { | |||||
| GELOGW("Failed to close handle %s", dlerror()); | |||||
| if (mmDlclose(handle) != 0) { | |||||
| error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed to close handle %s", error); | |||||
| } | } | ||||
| return FAILED; | return FAILED; | ||||
| } else { | } else { | ||||
| @@ -1146,8 +1183,10 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u | |||||
| const_cast<char *>(kernel_def.args().data()), args_size_, sm_contrl); | const_cast<char *>(kernel_def.args().data()), args_size_, sm_contrl); | ||||
| } | } | ||||
| } | } | ||||
| if (dlclose(handle) != 0) { | |||||
| GELOGW("Failed to close handle %s", dlerror()); | |||||
| if (mmDlclose(handle) != 0) { | |||||
| error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("Failed to close handle %s", error); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (cc_ret != cce::CC_STATUS_SUCCESS) { | if (cc_ret != cce::CC_STATUS_SUCCESS) { | ||||
| @@ -1188,7 +1227,7 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe | |||||
| *(reinterpret_cast<uint64_t *>( | *(reinterpret_cast<uint64_t *>( | ||||
| args + (reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0])) = | args + (reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0])) = | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_)); | |||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_)); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -23,7 +23,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { | |||||
| const void *func_stub_ = this->GetFuncStub(); | const void *func_stub_ = this->GetFuncStub(); | ||||
| const void *args[] = {this->GetNavTablePtr(), | const void *args[] = {this->GetNavTablePtr(), | ||||
| reinterpret_cast<const void *>(reinterpret_cast<uintptr_t>(this->GetNavTableSize()))}; | |||||
| reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))}; | |||||
| rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return | ||||
| @@ -27,7 +27,7 @@ SuperKernelFactory &SuperKernelFactory::GetInstance() { | |||||
| Status SuperKernelFactory::Init() { | Status SuperKernelFactory::Init() { | ||||
| if (!is_init_) { | if (!is_init_) { | ||||
| std::string skt_bin = "libcce_aicore.so"; | std::string skt_bin = "libcce_aicore.so"; | ||||
| handle_ = dlopen(skt_bin.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| handle_ = mmDlopen(skt_bin.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| if (handle_ == nullptr) { | if (handle_ == nullptr) { | ||||
| GELOGE(FAILED, "SKT: open skt lib failed, please check LD_LIBRARY_PATH."); | GELOGE(FAILED, "SKT: open skt lib failed, please check LD_LIBRARY_PATH."); | ||||
| } | } | ||||
| @@ -85,8 +85,10 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||||
| "equal to 2"); | "equal to 2"); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI("SKT: superkernel start fuse, superkernel size %d.", stub_func_list.size()); | |||||
| uint64_t nav_table[2 * stub_func_list.size()]; | |||||
| GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); | |||||
| const size_t nav_table_len = 2 * stub_func_list.size(); | |||||
| std::unique_ptr<uint64_t[]> nav_table(new(std::nothrow) uint64_t[nav_table_len]); | |||||
| GE_CHECK_NOTNULL(nav_table); | |||||
| uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); | uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); | ||||
| rtError_t rt_ret; | rtError_t rt_ret; | ||||
| @@ -99,16 +101,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||||
| GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); | GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); | ||||
| // store two uint64_t address | // store two uint64_t address | ||||
| // address divided by 4 because of 32bits encoding, call offset will *4 when calculating | // address divided by 4 because of 32bits encoding, call offset will *4 when calculating | ||||
| nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4; | |||||
| nav_table[i * 2] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4; | |||||
| GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); | GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); | ||||
| nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | |||||
| nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | |||||
| GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | ||||
| } | } | ||||
| rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| rt_ret = | rt_ret = | ||||
| rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); | ||||
| GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| // Create the necessary metadata for the super kernel | // Create the necessary metadata for the super kernel | ||||
| @@ -34,8 +34,10 @@ class SuperKernelFactory { | |||||
| ~SuperKernelFactory() { | ~SuperKernelFactory() { | ||||
| if (handle_ != nullptr) { | if (handle_ != nullptr) { | ||||
| GELOGI("SKT: SKT LIB PATH release."); | GELOGI("SKT: SKT LIB PATH release."); | ||||
| if (dlclose(handle_) != 0) { | |||||
| GELOGW("failed to close handle, message: %s", dlerror()); | |||||
| if (mmDlclose(handle_) != 0) { | |||||
| const char *error = mmDlerror(); | |||||
| GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||||
| GELOGW("failed to close handle, message: %s", error); | |||||
| } | } | ||||
| } | } | ||||
| }; | }; | ||||
| @@ -30,49 +30,37 @@ ZeroCopyOffset::ZeroCopyOffset() {} | |||||
| ZeroCopyOffset::~ZeroCopyOffset() {} | ZeroCopyOffset::~ZeroCopyOffset() {} | ||||
| Status ZeroCopyOffset::InitInputDataInfo(const vector<int64_t> &output_size_list, | |||||
| const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc, | |||||
| Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, | |||||
| bool &fusion_flag) { | bool &fusion_flag) { | ||||
| GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p", | GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p", | ||||
| op_desc->GetName().c_str(), output_size_list[kDataIndex], virtual_addr_list[kDataIndex]); | |||||
| if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size())) { | |||||
| GELOGE(PARAM_INVALID, "Data[%s] init failed: Output size is %zu, Output addr is %zu", op_desc->GetName().c_str(), | |||||
| output_size_list.size(), virtual_addr_list.size()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| basic_addr_ = virtual_addr_list[kDataIndex]; | |||||
| op_desc->GetName().c_str(), output_size, virtual_addr); | |||||
| basic_addr_ = virtual_addr; | |||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); | (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); | ||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); | (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); | ||||
| GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, | GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, | ||||
| "basic_offset_size should be equal to relative_offset_size"); | "basic_offset_size should be equal to relative_offset_size"); | ||||
| GELOGI("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size()); | |||||
| GELOGD("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size()); | |||||
| int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex); | int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex); | ||||
| GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset); | |||||
| IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); | IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); | ||||
| uint32_t out_count = 0; | uint32_t out_count = 0; | ||||
| data_size_ = output_size_list[kDataIndex]; | |||||
| data_size_ = output_size; | |||||
| if (!fusion_flag) { | if (!fusion_flag) { | ||||
| GELOGI("[ZCPY] %s not set l2_fusion.", op_desc->GetName().c_str()); | |||||
| out_count++; | out_count++; | ||||
| data_info_.emplace_back(output_size_list[kDataIndex], virtual_addr_list[kDataIndex]); | |||||
| data_info_.emplace_back(output_size, virtual_addr); | |||||
| relative_offset_.emplace_back(0); | relative_offset_.emplace_back(0); | ||||
| GELOGI("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size_list[kDataIndex], | |||||
| virtual_addr_list[kDataIndex]); | |||||
| GELOGD("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size, virtual_addr); | |||||
| } else { | } else { | ||||
| GELOGI("[ZCPY] set l2_fusion for %s.", op_desc->GetName().c_str()); | GELOGI("[ZCPY] set l2_fusion for %s.", op_desc->GetName().c_str()); | ||||
| for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) { | for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) { | ||||
| if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { | if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { | ||||
| out_count++; | out_count++; | ||||
| uint64_t out_offset = | |||||
| reinterpret_cast<uint64_t>(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); | |||||
| int64_t real_data_size = ModelUtils::GetOutputSize(op_desc).at(kDataIndex); | |||||
| data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(out_offset))); | |||||
| uint64_t out_offset = reinterpret_cast<uint64_t>(virtual_addr) + zero_copy_relative_offset_.at(index); | |||||
| data_info_.emplace_back(output_size, reinterpret_cast<void *>(static_cast<uintptr_t>(out_offset))); | |||||
| relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); | relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); | ||||
| GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion to %lu, need copy data_size is %ld.", basic_addr_, | GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion to %lu, need copy data_size is %ld.", basic_addr_, | ||||
| out_offset, real_data_size); | |||||
| out_offset, output_size); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -83,7 +71,6 @@ Status ZeroCopyOffset::InitInputDataInfo(const vector<int64_t> &output_size_list | |||||
| Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list, | Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list, | ||||
| const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc, | const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc, | ||||
| const size_t &idx, bool &fusion_flag) { | const size_t &idx, bool &fusion_flag) { | ||||
| GELOGI("[ZCPY] Start to InitOutputDataInfo of %s.", op_desc->GetName().c_str()); | |||||
| int64_t size = input_size_list[idx]; | int64_t size = input_size_list[idx]; | ||||
| auto tensor_desc = op_desc->GetInputDescPtr(idx); | auto tensor_desc = op_desc->GetInputDescPtr(idx); | ||||
| GE_CHECK_NOTNULL(tensor_desc); | GE_CHECK_NOTNULL(tensor_desc); | ||||
| @@ -92,7 +79,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); | |||||
| GELOGD("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); | |||||
| basic_addr_ = virtual_addr_list[idx]; | basic_addr_ = virtual_addr_list[idx]; | ||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); | (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); | ||||
| @@ -100,13 +87,11 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list | |||||
| GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, | GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, | ||||
| "basic_offset_size should be equal to relative_offset_size"); | "basic_offset_size should be equal to relative_offset_size"); | ||||
| int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx); | int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx); | ||||
| GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset); | |||||
| IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); | IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); | ||||
| uint32_t in_count = 0; | uint32_t in_count = 0; | ||||
| data_size_ = size; | data_size_ = size; | ||||
| if (!fusion_flag) { | if (!fusion_flag) { | ||||
| GELOGI("[ZCPY] %s not set l2-fusion.", op_desc->GetName().c_str()); | |||||
| in_count++; | in_count++; | ||||
| data_info_.emplace_back(size, virtual_addr_list[idx]); | data_info_.emplace_back(size, virtual_addr_list[idx]); | ||||
| // op_desc not set l2fusion when fusion_flag is false | // op_desc not set l2fusion when fusion_flag is false | ||||
| @@ -119,7 +104,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list | |||||
| in_count++; | in_count++; | ||||
| uint64_t in_offset = reinterpret_cast<uint64_t>(virtual_addr_list[idx]) + zero_copy_relative_offset_.at(index); | uint64_t in_offset = reinterpret_cast<uint64_t>(virtual_addr_list[idx]) + zero_copy_relative_offset_.at(index); | ||||
| int64_t real_data_size = ModelUtils::GetInputSize(op_desc).at(idx); | int64_t real_data_size = ModelUtils::GetInputSize(op_desc).at(idx); | ||||
| data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(in_offset))); | |||||
| data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(static_cast<uintptr_t>(in_offset))); | |||||
| relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); | relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); | ||||
| GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion from %lu, need copy data_size is %ld.", basic_addr_, | GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion from %lu, need copy data_size is %ld.", basic_addr_, | ||||
| in_offset, real_data_size); | in_offset, real_data_size); | ||||
| @@ -142,10 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const | |||||
| void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | ||||
| bool fusion_flag, std::set<const void *> &real_virtual_addrs) { | bool fusion_flag, std::set<const void *> &real_virtual_addrs) { | ||||
| GELOGI("[ZCPY] Start to SetInputOutsideAddrs for virtual_addr %p.", addr); | |||||
| uint32_t out_count = 0; | uint32_t out_count = 0; | ||||
| if (!fusion_flag) { | if (!fusion_flag) { | ||||
| GELOGI("[ZCPY] not set l2-fusion for virtual_adr %p.", addr); | |||||
| out_count++; | out_count++; | ||||
| std::map<const void *, std::vector<void *>> addr_mapping; | std::map<const void *, std::vector<void *>> addr_mapping; | ||||
| addr_mapping[addr] = {}; | addr_mapping[addr] = {}; | ||||
| @@ -175,7 +158,6 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||||
| GELOGI("[ZCPY] Start to SetOutputOutsideAddrs for virtual_addr %p.", addr); | GELOGI("[ZCPY] Start to SetOutputOutsideAddrs for virtual_addr %p.", addr); | ||||
| uint32_t out_count = 0; | uint32_t out_count = 0; | ||||
| if (!fusion_flag) { | if (!fusion_flag) { | ||||
| GELOGI("[ZCPY] not set l2-fusion for virtual_addr %p.", addr); | |||||
| out_count++; | out_count++; | ||||
| std::map<const void *, std::vector<void *>> addr_mapping; | std::map<const void *, std::vector<void *>> addr_mapping; | ||||
| addr_mapping[addr] = {}; | addr_mapping[addr] = {}; | ||||
| @@ -209,7 +191,7 @@ bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *ou | |||||
| GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | ||||
| void *args_val = static_cast<uint8_t *>(args) + offset; | void *args_val = static_cast<uint8_t *>(args) + offset; | ||||
| args_addrs->second.push_back(args_val); | args_addrs->second.push_back(args_val); | ||||
| GELOGI("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | |||||
| GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | |||||
| args, offset); | args, offset); | ||||
| set_batch_label_flag = true; | set_batch_label_flag = true; | ||||
| } | } | ||||
| @@ -42,8 +42,7 @@ class ZeroCopyOffset { | |||||
| ZeroCopyOffset(); | ZeroCopyOffset(); | ||||
| ~ZeroCopyOffset(); | ~ZeroCopyOffset(); | ||||
| Status InitInputDataInfo(const vector<int64_t> &output_size_list, const vector<void *> &virtual_addr_list, | |||||
| const OpDescPtr &op_desc, bool &fusion_flag); | |||||
| Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | |||||
| void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | ||||
| bool fusion_flag, std::set<const void *> &real_virtual_addrs); | bool fusion_flag, std::set<const void *> &real_virtual_addrs); | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| #include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
| #include "common/ge_compiler_options.h" | |||||
| namespace ge { | namespace ge { | ||||
| const char *const kDefaultBatchLable = "Batch_default"; | const char *const kDefaultBatchLable = "Batch_default"; | ||||
| @@ -48,7 +49,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { | |||||
| it->second.insert(offset); | it->second.insert(offset); | ||||
| } | } | ||||
| GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, | |||||
| GELOGD("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, | |||||
| args_addr_, args_size_, offset); | args_addr_, args_size_, offset); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -157,7 +158,7 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { | |||||
| rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, | rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, | ||||
| stream); | stream); | ||||
| } else { | } else { | ||||
| __builtin_prefetch(args_addr_); | |||||
| GE_BUILTIN_PREFETCH(args_addr_); | |||||
| rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE); | rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
| } | } | ||||
| @@ -166,7 +167,7 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { | |||||
| return RT_ERROR_TO_GE_STATUS(rt_err); | return RT_ERROR_TO_GE_STATUS(rt_err); | ||||
| } | } | ||||
| GELOGI("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(), | |||||
| GELOGD("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(), | |||||
| args_addr_, args_size_, args_info_.data(), args_info_.size()); | args_addr_, args_size_, args_info_.data(), args_info_.size()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -363,7 +363,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
| for (auto &subgraph : compute_graph->GetAllSubgraphs()) { | for (auto &subgraph : compute_graph->GetAllSubgraphs()) { | ||||
| (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | ||||
| } | } | ||||
| GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||||
| } | } | ||||
| GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | ||||
| @@ -396,8 +396,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
| stages.builder.SetOptions(options_); | stages.builder.SetOptions(options_); | ||||
| var_acc_ctrl_.AddGraph(graph_id, compute_graph); | var_acc_ctrl_.AddGraph(graph_id, compute_graph); | ||||
| GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -435,7 +433,7 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||||
| for (auto &subgraph : new_compute_graph->GetAllSubgraphs()) { | for (auto &subgraph : new_compute_graph->GetAllSubgraphs()) { | ||||
| (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | ||||
| } | } | ||||
| GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||||
| } | } | ||||
| GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | ||||
| @@ -468,8 +466,6 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||||
| stages.builder.SetOptions(options_); | stages.builder.SetOptions(options_); | ||||
| var_acc_ctrl_.AddGraph(graph_id, new_compute_graph); | var_acc_ctrl_.AddGraph(graph_id, new_compute_graph); | ||||
| GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -546,7 +542,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
| const auto &root_subgraph_list = sub_graph_map[compute_graph]; | const auto &root_subgraph_list = sub_graph_map[compute_graph]; | ||||
| std::string op_compile_strategy; | std::string op_compile_strategy; | ||||
| (void)AttrUtils::GetStr(compute_graph, ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void)AttrUtils::GetStr(compute_graph, ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
| GELOGI("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str()); | |||||
| GELOGD("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str()); | |||||
| for (const auto &subgraph : root_subgraph_list) { | for (const auto &subgraph : root_subgraph_list) { | ||||
| if (!op_compile_strategy.empty()) { | if (!op_compile_strategy.empty()) { | ||||
| (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
| @@ -576,7 +572,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
| vector_future.emplace_back(std::move(f)); | vector_future.emplace_back(std::move(f)); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("All sub graph num is %zu", vector_future.size()); | |||||
| GELOGD("All sub graph num is %zu", vector_future.size()); | |||||
| for (size_t i = 0; i < vector_future.size(); ++i) { | for (size_t i = 0; i < vector_future.size(); ++i) { | ||||
| Status ret_status = vector_future[i].get(); | Status ret_status = vector_future[i].get(); | ||||
| if (ret_status != SUCCESS) { | if (ret_status != SUCCESS) { | ||||
| @@ -700,7 +696,7 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr | |||||
| /// Multiply optimize subgraph: | /// Multiply optimize subgraph: | ||||
| /// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; | /// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; | ||||
| /// 2. run lx fusion or buffer according build_mode and build_step in fe. | /// 2. run lx fusion or buffer according build_mode and build_step in fe. | ||||
| GELOGI("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", | |||||
| GELOGD("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", | |||||
| options_.build_mode.c_str(), | options_.build_mode.c_str(), | ||||
| options_.build_step.c_str(), | options_.build_step.c_str(), | ||||
| buffer_optimize.c_str()); | buffer_optimize.c_str()); | ||||
| @@ -747,7 +743,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, | |||||
| GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | ||||
| GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed."); | GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed."); | ||||
| GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); | |||||
| GELOGD("PreRun:PreRunOptimizeOriginalGraph success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -762,10 +758,10 @@ Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, | |||||
| if (options_.build_mode == BUILD_MODE_TUNING && options_.build_step == BUILD_STEP_AFTER_UB_MATCH) { | if (options_.build_mode == BUILD_MODE_TUNING && options_.build_step == BUILD_STEP_AFTER_UB_MATCH) { | ||||
| std::string tuning_path; | std::string tuning_path; | ||||
| (void) GetContext().GetOption(TUNING_PATH, tuning_path); | (void) GetContext().GetOption(TUNING_PATH, tuning_path); | ||||
| GELOGI("Dump path:%s.", tuning_path.c_str()); | |||||
| GELOGD("Dump path:%s.", tuning_path.c_str()); | |||||
| GraphUtils::DumpGEGraph(compute_graph, "", true, tuning_path); | GraphUtils::DumpGEGraph(compute_graph, "", true, tuning_path); | ||||
| } | } | ||||
| GELOGI("PreRun:PreRunOptimizeSubGraph success."); | |||||
| GELOGD("PreRun:PreRunOptimizeSubGraph success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -785,12 +781,12 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||||
| } | } | ||||
| GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | ||||
| GELOGI("PreRun:PreRunAfterOptimizeSubGraph success."); | |||||
| GELOGD("PreRun:PreRunAfterOptimizeSubGraph success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { | Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { | ||||
| GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, | |||||
| GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, | |||||
| static_cast<int>(mode), ge::GetContext().DeviceId()); | static_cast<int>(mode), ge::GetContext().DeviceId()); | ||||
| rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); | rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); | ||||
| @@ -1251,7 +1247,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const | |||||
| Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | ||||
| GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | ||||
| GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | |||||
| GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | |||||
| if (inputs.empty()) { | if (inputs.empty()) { | ||||
| GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | ||||
| } | } | ||||
| @@ -1531,7 +1527,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
| // Set save_original_model flag (ge.save_original_model) | // Set save_original_model flag (ge.save_original_model) | ||||
| ParseOption(options, SAVE_ORIGINAL_MODEL, options_.save_original_model); | ParseOption(options, SAVE_ORIGINAL_MODEL, options_.save_original_model); | ||||
| GELOGI("Set save original model flag %s", options_.save_original_model.c_str()); | |||||
| // Original model file name | // Original model file name | ||||
| ParseOption(options, ORIGINAL_MODEL_FILE, options_.original_model_file); | ParseOption(options, ORIGINAL_MODEL_FILE, options_.original_model_file); | ||||
| @@ -1540,16 +1535,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
| ParseOption(options, DYNAMIC_NODE_TYPE, options_.dynamic_node_type); | ParseOption(options, DYNAMIC_NODE_TYPE, options_.dynamic_node_type); | ||||
| GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d.", | GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d.", | ||||
| options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | ||||
| if ((!options_.input_shape.empty() && options_.dynamic_dims.empty()) || | |||||
| (options_.input_shape.empty() && !options_.dynamic_dims.empty())) { | |||||
| GELOGE(GRAPH_PARAM_INVALID, "Should set input shape and dynamic dims at the same time"); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| if ((!options_.input_shape.empty() && options_.dynamic_node_type == kInvalidDynaimcDimsType) || | |||||
| (!options_.dynamic_dims.empty() && options_.dynamic_node_type == kInvalidDynaimcDimsType)) { | |||||
| GELOGE(GRAPH_PARAM_INVALID, "Should set valid dynamic node type"); | |||||
| return GRAPH_PARAM_INVALID; | |||||
| } | |||||
| // Set Build model and step | // Set Build model and step | ||||
| ParseOption(options, BUILD_MODE, options_.build_mode); | ParseOption(options, BUILD_MODE, options_.build_mode); | ||||
| @@ -2252,7 +2237,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | ||||
| GELOGI("Start optimize after merge sub graph."); | |||||
| GELOGD("Start optimize after merge sub graph."); | |||||
| PassManager after_merge_passes; | PassManager after_merge_passes; | ||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | ||||
| @@ -2502,7 +2487,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||||
| ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); | ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); | ||||
| const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); | const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); | ||||
| GELOGI("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", | |||||
| GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", | |||||
| compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), | compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), | ||||
| pthread_self()); | pthread_self()); | ||||
| GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); | GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); | ||||
| @@ -2514,11 +2499,11 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||||
| GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | ||||
| return ret; | return ret; | ||||
| } else { | } else { | ||||
| GELOGI("SubGraph optimize success %s", engine_name.c_str()); | |||||
| GELOGD("SubGraph optimize success %s", engine_name.c_str()); | |||||
| } | } | ||||
| GE_DUMP(compute_graph_tmp, "OptimizeSubGraphAfter"); | GE_DUMP(compute_graph_tmp, "OptimizeSubGraphAfter"); | ||||
| sub_graph_info_ptr->SetSubGraph(compute_graph_tmp); | sub_graph_info_ptr->SetSubGraph(compute_graph_tmp); | ||||
| GELOGI("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu", | |||||
| GELOGD("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu", | |||||
| compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), | compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), | ||||
| pthread_self()); | pthread_self()); | ||||
| } else { | } else { | ||||
| @@ -2849,13 +2834,15 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
| if (args.graph_node->graph_run_async_listener_ != nullptr) { | if (args.graph_node->graph_run_async_listener_ != nullptr) { | ||||
| args.graph_node->graph_run_async_listener_->SetCallback(args.callback); | args.graph_node->graph_run_async_listener_->SetCallback(args.callback); | ||||
| } | } | ||||
| Status ret; | |||||
| // parse inputs.dims to vector<vector<uint64_t>> dynamic_dims | // parse inputs.dims to vector<vector<uint64_t>> dynamic_dims | ||||
| if (graph_manager->ParseInputsDims(args.input_tensor) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Parse input dims failed."); | |||||
| ret = graph_manager->ParseInputsDims(args.input_tensor); | |||||
| if (ret != SUCCESS) { | |||||
| ReturnError(graph_manager, args.callback, ret, "ParseInputsDims failed, thread exit."); | |||||
| args.graph_node->Unlock(); | |||||
| return; | return; | ||||
| } | } | ||||
| Status ret; | |||||
| if (!args.graph_node->GetLoadFlag()) { | if (!args.graph_node->GetLoadFlag()) { | ||||
| ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); | ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); | ||||
| if (ret != SUCCESS || args.ge_root_model == nullptr) { | if (ret != SUCCESS || args.ge_root_model == nullptr) { | ||||
| @@ -2880,12 +2867,12 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
| ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | ||||
| args.input_tensor); | args.input_tensor); | ||||
| args.graph_node->SetRunFlag(false); | args.graph_node->SetRunFlag(false); | ||||
| args.graph_node->Unlock(); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[GraphManager] Run graph async failed, graph_id=%u.", args.graph_id); | |||||
| StopQueue(graph_manager); | |||||
| ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); | |||||
| args.graph_node->Unlock(); | |||||
| return; | return; | ||||
| } | } | ||||
| args.graph_node->Unlock(); | |||||
| GELOGI("[GraphManager] Run graph async success, graph_id=%u.", args.graph_id); | GELOGI("[GraphManager] Run graph async success, graph_id=%u.", args.graph_id); | ||||
| } | } | ||||
| } | } | ||||
| @@ -92,13 +92,13 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||||
| GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | ||||
| if (var_addr_mgr_map_.count(var_key) == 0) { | if (var_addr_mgr_map_.count(var_key) == 0) { | ||||
| uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | ||||
| reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||||
| static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||||
| GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | ||||
| TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | ||||
| TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); | TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); | ||||
| VarAddrMgr var_addr_mgr; | VarAddrMgr var_addr_mgr; | ||||
| var_addr_mgr.address = reinterpret_cast<uint8_t *>(reinterpret_cast<std::uintptr_t>(logic_address)); | |||||
| var_addr_mgr.offset = reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||||
| var_addr_mgr.address = reinterpret_cast<uint8_t *>(static_cast<std::uintptr_t>(logic_address)); | |||||
| var_addr_mgr.offset = static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||||
| var_addr_mgr.tensor_desc = tensor_desc; | var_addr_mgr.tensor_desc = tensor_desc; | ||||
| var_addr_mgr.memory_type = memory_type; | var_addr_mgr.memory_type = memory_type; | ||||
| var_addr_mgr_map_[var_key] = var_addr_mgr; | var_addr_mgr_map_[var_key] = var_addr_mgr; | ||||
| @@ -510,7 +510,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||||
| } | } | ||||
| result = var_resource_->SaveVarAddr( | result = var_resource_->SaveVarAddr( | ||||
| var_name, tensor_desc, reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type); | |||||
| var_name, tensor_desc, reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type); | |||||
| if (result != SUCCESS) { | if (result != SUCCESS) { | ||||
| GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed."); | GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed."); | ||||
| return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
| @@ -527,7 +527,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||||
| result = var_resource_->GetCurVarDesc(var_name, cur_tensor_desc); | result = var_resource_->GetCurVarDesc(var_name, cur_tensor_desc); | ||||
| if (result != SUCCESS) { | if (result != SUCCESS) { | ||||
| var_resource_->SetVarAddr(var_name, tensor_desc, | var_resource_->SetVarAddr(var_name, tensor_desc, | ||||
| reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type); | |||||
| reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -542,7 +542,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||||
| ge::TypeUtils::FormatToSerialString(cur_tensor_desc.GetFormat()).c_str(), | ge::TypeUtils::FormatToSerialString(cur_tensor_desc.GetFormat()).c_str(), | ||||
| cur_tensor_desc.GetShape().GetDims().size()); | cur_tensor_desc.GetShape().GetDims().size()); | ||||
| var_resource_->SetVarAddr(var_name, tensor_desc, | var_resource_->SetVarAddr(var_name, tensor_desc, | ||||
| reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type); | |||||
| reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -642,7 +642,7 @@ ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::strin | |||||
| bool VarManager::IsVarAddr(const int64_t &offset) { | bool VarManager::IsVarAddr(const int64_t &offset) { | ||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
| if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
| GELOGW("VarManager has not been init."); | |||||
| GELOGD("VarManager has not been init."); | |||||
| return false; | return false; | ||||
| } | } | ||||
| return var_resource_->IsVarAddr(offset); | return var_resource_->IsVarAddr(offset); | ||||
| @@ -374,7 +374,7 @@ Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge | |||||
| GE_MAKE_GUARD_RTMEM(src_host_addr); | GE_MAKE_GUARD_RTMEM(src_host_addr); | ||||
| GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id)); | GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id)); | ||||
| GELOGI("src_addr_size: %u, dst_addr_size: %u", src_addr_size, dst_addr_size); | |||||
| GELOGI("src_addr_size: %ld, dst_addr_size: %ld", src_addr_size, dst_addr_size); | |||||
| GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, "var data size is not equal broadcast "); | GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, "var data size is not equal broadcast "); | ||||
| GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| @@ -403,7 +403,7 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT | |||||
| GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); | GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); | ||||
| uint8_t *mem_addr = | uint8_t *mem_addr = | ||||
| src_addr - | src_addr - | ||||
| static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||||
| static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||||
| static_cast<int64_t>( | static_cast<int64_t>( | ||||
| reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | ||||
| GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); | GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); | ||||
| @@ -420,7 +420,7 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 | |||||
| GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); | GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); | ||||
| uint8_t *mem_addr = | uint8_t *mem_addr = | ||||
| dst_addr - | dst_addr - | ||||
| static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||||
| static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||||
| static_cast<int64_t>( | static_cast<int64_t>( | ||||
| reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | ||||
| GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| @@ -501,7 +501,7 @@ Status TransVarDataUtils::TransAllVarData(const vector<NodePtr> &variable_nodes, | |||||
| } | } | ||||
| Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id) { | Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id) { | ||||
| GELOGI("CopyVarData start: session_id:%lu.", session_id); | |||||
| GELOGD("CopyVarData start: session_id:%lu.", session_id); | |||||
| if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
| GELOGE(FAILED, "compute_graph is nullptr"); | GELOGE(FAILED, "compute_graph is nullptr"); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -32,7 +32,7 @@ Debug::~Debug() = default; | |||||
| void Debug::DumpProto(const Message &proto, const char *file) { | void Debug::DumpProto(const Message &proto, const char *file) { | ||||
| std::string file_path = RealPath(file); | std::string file_path = RealPath(file); | ||||
| int fd = open(file_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); | |||||
| int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD); | |||||
| if (fd == -1) { | if (fd == -1) { | ||||
| GELOGW("Write %s failed", file_path.c_str()); | GELOGW("Write %s failed", file_path.c_str()); | ||||
| return; | return; | ||||
| @@ -40,7 +40,7 @@ void Debug::DumpProto(const Message &proto, const char *file) { | |||||
| auto output = ge::MakeShared<FileOutputStream>(fd); | auto output = ge::MakeShared<FileOutputStream>(fd); | ||||
| if (output == nullptr) { | if (output == nullptr) { | ||||
| GELOGW("create output failed."); | GELOGW("create output failed."); | ||||
| if (close(fd) != 0) { | |||||
| if (mmClose(fd) != 0) { | |||||
| GELOGW("close fd failed."); | GELOGW("close fd failed."); | ||||
| } | } | ||||
| return; | return; | ||||
| @@ -49,7 +49,7 @@ void Debug::DumpProto(const Message &proto, const char *file) { | |||||
| if (!ret) { | if (!ret) { | ||||
| GELOGW("dump proto failed."); | GELOGW("dump proto failed."); | ||||
| } | } | ||||
| if (close(fd) != 0) { | |||||
| if (mmClose(fd) != 0) { | |||||
| GELOGW("close fd failed."); | GELOGW("close fd failed."); | ||||
| } | } | ||||
| } | } | ||||
| @@ -17,7 +17,6 @@ | |||||
| #ifndef GE_GRAPH_MANAGER_UTIL_DEBUG_H_ | #ifndef GE_GRAPH_MANAGER_UTIL_DEBUG_H_ | ||||
| #define GE_GRAPH_MANAGER_UTIL_DEBUG_H_ | #define GE_GRAPH_MANAGER_UTIL_DEBUG_H_ | ||||
| #include <fcntl.h> | |||||
| #include <sys/stat.h> | #include <sys/stat.h> | ||||
| #include <sys/types.h> | #include <sys/types.h> | ||||
| #include <time.h> | #include <time.h> | ||||
| @@ -25,7 +24,6 @@ | |||||
| #include <google/protobuf/io/coded_stream.h> | #include <google/protobuf/io/coded_stream.h> | ||||
| #include <google/protobuf/io/zero_copy_stream_impl.h> | #include <google/protobuf/io/zero_copy_stream_impl.h> | ||||
| #include <google/protobuf/text_format.h> | #include <google/protobuf/text_format.h> | ||||
| #include <unistd.h> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <fstream> | #include <fstream> | ||||
| #include <iosfwd> | #include <iosfwd> | ||||
| @@ -58,8 +58,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { | |||||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
| auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | ||||
| GE_IF_BOOL_EXEC( | |||||
| peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr! node: %s", node->GetName().c_str()); continue); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
| ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); | ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); | ||||
| src_index_list = node_op_desc->GetSrcIndex(); | src_index_list = node_op_desc->GetSrcIndex(); | ||||
| @@ -242,11 +241,11 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr | |||||
| } | } | ||||
| auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); | auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); | ||||
| GELOGI("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.", | |||||
| GELOGD("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.", | |||||
| graph_optimizer.size()); | graph_optimizer.size()); | ||||
| Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
| string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; | string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; | ||||
| GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", | |||||
| GELOGD("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", | |||||
| exclude_core_Type.c_str(), core_type_.c_str()); | exclude_core_Type.c_str(), core_type_.c_str()); | ||||
| if (graph_optimizer.size() != 0) { | if (graph_optimizer.size() != 0) { | ||||
| for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | ||||
| @@ -1,397 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/optimize/optimizer/allreduce_fusion_pass.h" | |||||
| #include <string> | |||||
| #include "common/debug/log.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "common/types.h" | |||||
| #include "common/util.h" | |||||
| #include "graph/anchor.h" | |||||
| #include "graph/node.h" | |||||
| #include "graph/op_desc.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "hccl/base.h" | |||||
| #include "hccl/hcom.h" | |||||
| namespace ge { | |||||
| Status AllReducePass::Run(ge::ComputeGraphPtr graph) { | |||||
| GELOGI("FusionAllReducePass: start"); | |||||
| std::vector<NodePtr> fusionOps; | |||||
| std::vector<float> inputGradientSize; | |||||
| std::vector<float> inputGradientTime; | |||||
| static const float inputGradientSizeTemp = 0.0; | |||||
| static const float inputGradientTimeTemp = 0.0; | |||||
| // Get all nodes | |||||
| for (auto nodePtr : graph->GetDirectNode()) { | |||||
| GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;); | |||||
| ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(nullptr == opDescPtr, | |||||
| GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str()); | |||||
| continue;) | |||||
| GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(), | |||||
| // the op is allreduce and fusion > 0, then run fusion | |||||
| std::int64_t hcom_fusion = 1; | |||||
| GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion), | |||||
| GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr " | |||||
| "by HCOM_ATTR_FUSION")); | |||||
| GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC( | |||||
| hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp); | |||||
| inputGradientTime.push_back(inputGradientTimeTemp);)) | |||||
| } | |||||
| // The number of allredecue operator must be more than 1 | |||||
| GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has " | |||||
| "%lu allreduce operator", | |||||
| fusionOps.size()); | |||||
| return NOT_CHANGED;); | |||||
| string group = "group"; | |||||
| u32 gradientNum = fusionOps.size(); | |||||
| string model_name_str = graph->GetName(); | |||||
| const char *model_name = model_name_str.c_str(); | |||||
| model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()}; | |||||
| u32 segmentNum = 0; | |||||
| u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {}; | |||||
| // Call HCCL function: hcom_gradient_segment | |||||
| GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy"); | |||||
| GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM, | |||||
| &segmentNum, segmentIndex), | |||||
| GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size()); | |||||
| return FAILED;) | |||||
| GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully"); | |||||
| // check whether segmentNum is legal or not | |||||
| GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum), | |||||
| GELOGE(FAILED, | |||||
| "FusionAllReducePass FAILED: illegal segmentNum=%u, " | |||||
| "HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u", | |||||
| segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum); | |||||
| return FAILED;); | |||||
| // check whether segmentIndex is legal or not | |||||
| GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1), | |||||
| GELOGE(FAILED, | |||||
| "FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, " | |||||
| "segmentIndex[segmentNum-1]=%u, gradientNum=%u", | |||||
| segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum); | |||||
| return FAILED;); | |||||
| for (uint32_t i = 0; i < segmentNum - 1; i++) { | |||||
| GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED, | |||||
| "FusionAllReducePass FAILED: illegal " | |||||
| "segmentIndex[%u]=%u, segmentIndex[%u]=%u", | |||||
| i, segmentIndex[i], i + 1, segmentIndex[i + 1]); | |||||
| return FAILED;); | |||||
| } | |||||
| // check whether fusion is needed or not | |||||
| GE_IF_BOOL_EXEC( | |||||
| segmentNum == gradientNum, | |||||
| GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum); | |||||
| return NOT_CHANGED;) | |||||
| std::unordered_set<void *> anchorPtrSet; | |||||
| std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataAnchor; | |||||
| std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataToInControl; | |||||
| std::vector<ge::OutControlAnchorPtr> fusionOpPeerOutControlAnchor; | |||||
| std::vector<std::pair<int, ge::InDataAnchorPtr>> fusionOpPeerInDataAnchor; | |||||
| std::vector<std::pair<int, ge::InControlAnchorPtr>> fusionOpPeerInControlFromOutData; | |||||
| std::vector<ge::InControlAnchorPtr> fusionOpPeerInControlAnchor; | |||||
| ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr; | |||||
| // Traversing the segmentNum | |||||
| uint32_t start = 0; | |||||
| uint32_t end = 0; | |||||
| for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) { | |||||
| end = segmentIndex[segmentIdx]; | |||||
| GE_IF_BOOL_EXEC(end - start < 1, | |||||
| GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]); | |||||
| start = end + 1; continue;); | |||||
| ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(originDescPtr); | |||||
| ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr); | |||||
| GE_CHECK_NOTNULL(newAllreduceDesc); | |||||
| // Cleat buffer | |||||
| anchorPtrSet.clear(); | |||||
| fusionOpPeerOutDataAnchor.clear(); | |||||
| fusionOpPeerOutDataToInControl.clear(); | |||||
| fusionOpPeerOutControlAnchor.clear(); | |||||
| fusionOpPeerInDataAnchor.clear(); | |||||
| fusionOpPeerInControlFromOutData.clear(); | |||||
| fusionOpPeerInControlAnchor.clear(); | |||||
| // Traversing the Allreduce operators of each group | |||||
| int outDataAnchorIndex = 0; | |||||
| GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]), | |||||
| "Get peer outDataAnchor to inDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, | |||||
| fusionOps[start]), | |||||
| "Get peer inDataAnchor and inControlAnchor to outDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]), | |||||
| "Get peer outDataAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]), | |||||
| "Get peer outControlAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]), | |||||
| "Get peer outControlAnchor from inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.", | |||||
| fusionOps[start]->GetName().c_str()); | |||||
| for (uint32_t idx = start + 1; idx <= end; idx++) { | |||||
| GE_CHK_STATUS_RET( | |||||
| GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc), | |||||
| "Get peer outDataAnchor to inDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]), | |||||
| "Get peer outDataAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]), | |||||
| "Get peer outControlAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET( | |||||
| GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, | |||||
| fusionOps[idx], newAllreduceDesc, outDataAnchorIndex), | |||||
| "Get peerAnchor from outDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]), | |||||
| "Get peer outControlAnchor from inControlAnchor failed"); | |||||
| // Delete the node | |||||
| GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.", | |||||
| fusionOps[idx]->GetName().c_str()); | |||||
| } | |||||
| NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc); | |||||
| GE_CHECK_NOTNULL(newAllReducePtr); | |||||
| // Link the inputDataAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) { | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast<int>(i))), | |||||
| "FusionAllReducePass FAILED: add input data edge failed"); | |||||
| } | |||||
| // Link the inputControlAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) { | |||||
| GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()), | |||||
| "FusionAllReducePass FAILED: add input control edge failed"); | |||||
| } | |||||
| for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) { | |||||
| GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()), | |||||
| "FusionAllReducePass FAILED: add edge from out data to incontrol " | |||||
| "failed"); | |||||
| } | |||||
| // Link the outputDataAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) { | |||||
| auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second; | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor), | |||||
| "FusionAllReducePass FAILED: add output data edge failed"); | |||||
| } | |||||
| for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) { | |||||
| auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second; | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first), | |||||
| peerInControlAnchor), | |||||
| "FusionAllReducePass FAILED: add edge from out data to in control " | |||||
| "failed"); | |||||
| } | |||||
| // Link the outputControlAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) { | |||||
| GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]), | |||||
| "FusionAllReducePass FAILED: add output control edge failed"); | |||||
| } | |||||
| // Link the newAllreduce | |||||
| if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) { | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()), | |||||
| "FusionAllReducePass FAILED: add input previous control edge failed"); | |||||
| } | |||||
| previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor(); | |||||
| start = end + 1; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); | |||||
| OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutDataAnchor.get()) == 0) { | |||||
| peerOutDataAnchorVec.push_back(peerOutDataAnchor); | |||||
| anchorSet.insert(peerOutDataAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerInAnchorToOutData( | |||||
| std::unordered_set<void *> &anchorSet, std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor, | |||||
| std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) { | |||||
| for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;); | |||||
| for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerInDataAnchor.get()) == 0) { | |||||
| std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor; | |||||
| pairPeerInDataAnchor.first = 0; | |||||
| pairPeerInDataAnchor.second = peerInDataAnchor; | |||||
| fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor); | |||||
| anchorSet.insert(peerInDataAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)); | |||||
| } | |||||
| } | |||||
| for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;); | |||||
| if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { | |||||
| std::pair<uint32_t, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData; | |||||
| pairPeerInControlAnchorFromData.first = 0; | |||||
| pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; | |||||
| fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData); | |||||
| anchorSet.insert(peerInControlAnchorFromData.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||||
| ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) { | |||||
| for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); | |||||
| OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutDataAnchor.get()) == 0) { | |||||
| peerOutDataAnchorVec.push_back(peerOutDataAnchor); | |||||
| anchorSet.insert(peerOutDataAnchor.get()); | |||||
| if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) != | |||||
| ge::GRAPH_SUCCESS) { | |||||
| GELOGW("GetPeerOutDataToInData: AddInputDesc failed"); | |||||
| } | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(inControlAnchor); | |||||
| for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutDataToInControl.get()) == 0) { | |||||
| peerOutDataToInControlVec.push_back(peerOutDataToInControl); | |||||
| anchorSet.insert(peerOutDataToInControl.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(inControlAnchor); | |||||
| for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutControlAnchor.get()) == 0) { | |||||
| peerOutControlToInControlVec.push_back(peerOutControlAnchor); | |||||
| anchorSet.insert(peerOutControlAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerAnchorFromOutData( | |||||
| std::unordered_set<void *> &anchorSet, vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec, | |||||
| vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr, | |||||
| ge::OpDescPtr &dstOpDescPtr, int &index) { | |||||
| for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;) | |||||
| if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) { | |||||
| if (dstOpDescPtr->AddOutputDesc( | |||||
| outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) { | |||||
| GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed"); | |||||
| } | |||||
| index++; | |||||
| } | |||||
| for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;) | |||||
| if (anchorSet.count(peerInDataAnchor.get()) == 0) { | |||||
| std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor; | |||||
| pairPeerInDataAnchor.first = index; | |||||
| pairPeerInDataAnchor.second = peerInDataAnchor; | |||||
| peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor); | |||||
| anchorSet.insert(peerInDataAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)) | |||||
| } | |||||
| } | |||||
| for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;) | |||||
| if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { | |||||
| std::pair<int, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData; | |||||
| pairPeerInControlAnchorFromData.first = index; | |||||
| pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; | |||||
| peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData); | |||||
| anchorSet.insert(peerInControlAnchorFromData.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)) | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor(); | |||||
| GE_CHECK_NOTNULL(outControlAnchor); | |||||
| for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;) | |||||
| if (anchorSet.count(peerInControlAnchor.get()) == 0) { | |||||
| peerInControlFromOutControlVec.push_back(peerInControlAnchor); | |||||
| anchorSet.insert(peerInControlAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor)) | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -1,56 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||||
| #define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "inc/graph_pass.h" | |||||
| namespace ge { | |||||
| // | |||||
| class AllReducePass : public GraphPass { | |||||
| public: | |||||
| Status Run(ge::ComputeGraphPtr graph) override; | |||||
| private: | |||||
| Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr, | |||||
| ge::OpDescPtr &dstOpDescPtr); | |||||
| Status GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerAnchorFromOutData(std::unordered_set<void *> &anchorSet, | |||||
| vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec, | |||||
| vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, | |||||
| ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index); | |||||
| Status GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| std::vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerInAnchorToOutData(std::unordered_set<void *> &anchorSet, | |||||
| std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor, | |||||
| std::vector<std::pair<int, ge::InControlAnchorPtr>>&fusionOpPeerInControlFromOutData, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||||
| @@ -50,7 +50,7 @@ Status EnginePlacer::Check() const { | |||||
| Status EnginePlacer::Run() { | Status EnginePlacer::Run() { | ||||
| std::lock_guard<std::mutex> lock(check_support_cost_mutex); | std::lock_guard<std::mutex> lock(check_support_cost_mutex); | ||||
| GELOGI("Engine placer starts."); | |||||
| GELOGD("Engine placer starts."); | |||||
| if (Check() != SUCCESS) { | if (Check() != SUCCESS) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -101,7 +101,7 @@ Status EnginePlacer::Run() { | |||||
| for (auto &it : ge::GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) { | for (auto &it : ge::GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) { | ||||
| GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second); | GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second); | ||||
| } | } | ||||
| GELOGI("Engine placer ends."); | |||||
| GELOGD("Engine placer ends."); | |||||
| return is_check_support_success ? SUCCESS : FAILED; | return is_check_support_success ? SUCCESS : FAILED; | ||||
| } | } | ||||
| @@ -223,7 +223,7 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||||
| GELOGE(GE_GRAPH_UNSUPPORTED, "Cannot call merging in partition mode"); | GELOGE(GE_GRAPH_UNSUPPORTED, "Cannot call merging in partition mode"); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI("Graph merge starts."); | |||||
| GELOGD("Graph merge starts."); | |||||
| // check input param | // check input param | ||||
| for (const auto &it : sub_graph_list) { | for (const auto &it : sub_graph_list) { | ||||
| if (it == nullptr) { | if (it == nullptr) { | ||||
| @@ -261,7 +261,7 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GE_TIMESTAMP_END(MergeSubGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun"); | GE_TIMESTAMP_END(MergeSubGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun"); | ||||
| GELOGI("Graph merge ends."); | |||||
| GELOGD("Graph merge ends."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -581,7 +581,7 @@ Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) { | |||||
| new_cluster->engine_name_.c_str(), new_cluster->index_, new_cluster->stream_label_.c_str()); | new_cluster->engine_name_.c_str(), new_cluster->index_, new_cluster->stream_label_.c_str()); | ||||
| temp_index++; | temp_index++; | ||||
| } | } | ||||
| GELOGI("Initialize ends."); | |||||
| GELOGD("Initialize ends."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -754,11 +754,11 @@ void ge::GraphPartitioner::MarkClusters() { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("MarkClusters ends."); | |||||
| GELOGD("MarkClusters ends."); | |||||
| } | } | ||||
| Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) { | Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) { | ||||
| GELOGI("SplitSubGraphs starts."); | |||||
| GELOGD("SplitSubGraphs starts."); | |||||
| if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
| GELOGE(FAILED, "parameter ptr is null."); | GELOGE(FAILED, "parameter ptr is null."); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -823,7 +823,7 @@ Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("SplitSubGraphs ends."); | |||||
| GELOGD("SplitSubGraphs ends."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -46,7 +46,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { | |||||
| } | } | ||||
| } | } | ||||
| if (atomic_node_vec.empty()) { | if (atomic_node_vec.empty()) { | ||||
| GELOGI("There is no atomic node. Ignore atomicAddrClean pass."); | |||||
| GELOGD("There is no atomic node. Ignore atomicAddrClean pass."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -332,7 +332,7 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } else { | } else { | ||||
| GELOGI("no need cond_remove_pass for node %s.", node->GetName().c_str()); | |||||
| GELOGD("no need cond_remove_pass for node %s.", node->GetName().c_str()); | |||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "graph/passes/mark_agnostic_pass.h" | #include "graph/passes/mark_agnostic_pass.h" | ||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | |||||
| namespace ge { | namespace ge { | ||||
| Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { | Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { | ||||
| @@ -47,6 +48,16 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { | |||||
| } | } | ||||
| if (node_type == MERGE) { | if (node_type == MERGE) { | ||||
| GELOGD("Mark format agnostic and continuous for merge node %s", node->GetName().c_str()); | GELOGD("Mark format agnostic and continuous for merge node %s", node->GetName().c_str()); | ||||
| auto in_nodes = node->GetInAllNodes(); | |||||
| vector<NodePtr> input_nodes(in_nodes.begin(), in_nodes.end()); | |||||
| /// Enter-----------+ | |||||
| /// +-> Merge | |||||
| /// NextIteration---+ | |||||
| if (input_nodes.size() == 2) { | |||||
| if (input_nodes[0]->GetType() == ENTER && input_nodes[1]->GetType() == NEXTITERATION) { | |||||
| continue; | |||||
| } | |||||
| } | |||||
| const OpDescPtr op_desc = node->GetOpDesc(); | const OpDescPtr op_desc = node->GetOpDesc(); | ||||
| const GeTensorDescPtr op_tensor = op_desc->MutableOutputDesc(0); | const GeTensorDescPtr op_tensor = op_desc->MutableOutputDesc(0); | ||||
| if (op_tensor == nullptr) { | if (op_tensor == nullptr) { | ||||
| @@ -278,7 +278,7 @@ Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &ou | |||||
| } | } | ||||
| Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node) { | Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node) { | ||||
| GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); | |||||
| GELOGD("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); | |||||
| for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
| auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx()); | auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx()); | ||||
| GE_CHECK_NOTNULL(in_node); | GE_CHECK_NOTNULL(in_node); | ||||
| @@ -33,7 +33,7 @@ Status MultiBatchPass::Run(ComputeGraphPtr graph) { | |||||
| OutDataAnchorPtr pred_value = nullptr; | OutDataAnchorPtr pred_value = nullptr; | ||||
| Status ret = FindPredValue(graph, pred_value); | Status ret = FindPredValue(graph, pred_value); | ||||
| if (ret == NOT_CHANGED) { | if (ret == NOT_CHANGED) { | ||||
| GELOGI("SwitchN node not exist, graph not changed."); | |||||
| GELOGD("SwitchN node not exist, graph not changed."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -158,7 +158,7 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor | |||||
| } | } | ||||
| if (switch_n_nodes_.empty()) { | if (switch_n_nodes_.empty()) { | ||||
| GELOGI("SwitchN node not exist."); | |||||
| GELOGD("SwitchN node not exist."); | |||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| @@ -128,7 +128,7 @@ Status SetInputOutputOffsetPass::SetInputOffsetForHcom(const ge::NodePtr &node, | |||||
| } | } | ||||
| Status SetInputOutputOffsetPass::SetInputOffset(const NodePtr &node, const vector<int> &connect_input) { | Status SetInputOutputOffsetPass::SetInputOffset(const NodePtr &node, const vector<int> &connect_input) { | ||||
| GELOGI("Start to SetInputOffset for %s.", node->GetName().c_str()); | |||||
| GELOGD("Start to SetInputOffset for %s.", node->GetName().c_str()); | |||||
| std::vector<int64_t> memory_type; | std::vector<int64_t> memory_type; | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | ||||
| @@ -241,7 +241,7 @@ Status SetInputOutputOffsetPass::SetOutputOffsetForHcom(const NodePtr &node, con | |||||
| } | } | ||||
| Status SetInputOutputOffsetPass::SetOutputOffset(const NodePtr &node, const vector<int> &connect_output) { | Status SetInputOutputOffsetPass::SetOutputOffset(const NodePtr &node, const vector<int> &connect_output) { | ||||
| GELOGI("Start SetOutputOffset of %s.", node->GetName().c_str()); | |||||
| GELOGD("Start SetOutputOffset of %s.", node->GetName().c_str()); | |||||
| bool attr_no_task = false; | bool attr_no_task = false; | ||||
| bool get_attr_no_task = ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_NOTASK, attr_no_task); | bool get_attr_no_task = ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_NOTASK, attr_no_task); | ||||
| if (get_attr_no_task && attr_no_task) { | if (get_attr_no_task && attr_no_task) { | ||||
| @@ -117,6 +117,7 @@ | |||||
| #include "graph/passes/variable_op_pass.h" | #include "graph/passes/variable_op_pass.h" | ||||
| #include "graph/passes/variable_prepare_op_pass.h" | #include "graph/passes/variable_prepare_op_pass.h" | ||||
| #include "graph/passes/variable_ref_delete_op_pass.h" | #include "graph/passes/variable_ref_delete_op_pass.h" | ||||
| #include "graph/passes/mark_agnostic_pass.h" | |||||
| namespace ge { | namespace ge { | ||||
| @@ -1700,6 +1701,7 @@ Status GraphPrepare::PrepareOptimize() { | |||||
| try { | try { | ||||
| (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); | (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); | ||||
| (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); | (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); | ||||
| (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass" , new MarkAgnosticPass); | |||||
| } catch (std::bad_alloc &e) { | } catch (std::bad_alloc &e) { | ||||
| GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); | GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -1571,6 +1571,10 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, | |||||
| // Connect NetOutput directly | // Connect NetOutput directly | ||||
| void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, | void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, | ||||
| const set<size_t> &dynamic_output_index, vector<string> &dynamic_output_dims) { | const set<size_t> &dynamic_output_index, vector<string> &dynamic_output_dims) { | ||||
| if (!GetLocalOmgContext().dynamic_node_type.empty()) { | |||||
| GELOGD("No need to get directly shape info of %s when train.", node->GetName().c_str()); | |||||
| return; | |||||
| } | |||||
| GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | ||||
| const auto &netoutput_desc = node->GetOpDesc(); | const auto &netoutput_desc = node->GetOpDesc(); | ||||
| const auto &inputnode_to_netoutput = node->GetInAllNodes(); | const auto &inputnode_to_netoutput = node->GetInAllNodes(); | ||||
| @@ -1578,9 +1582,6 @@ void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, | |||||
| if (dynamic_output_index.count(i) > 0) { | if (dynamic_output_index.count(i) > 0) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (inputnode_to_netoutput.at(i)->GetType() == GETDYNAMICDIMS) { | |||||
| continue; | |||||
| } | |||||
| auto tensor_desc = netoutput_desc->GetInputDesc(i); | auto tensor_desc = netoutput_desc->GetInputDesc(i); | ||||
| auto shape = tensor_desc.GetShape().ToString(); | auto shape = tensor_desc.GetShape().ToString(); | ||||
| @@ -84,8 +84,10 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector<NodePtr> &data_n | |||||
| if (op_desc->GetType() == DATA && op_desc->GetName() != kShapeDataName) { | if (op_desc->GetType() == DATA && op_desc->GetName() != kShapeDataName) { | ||||
| if (op_desc->GetName().find(kSubstrOfGetNextNosinkName) == string::npos) { | if (op_desc->GetName().find(kSubstrOfGetNextNosinkName) == string::npos) { | ||||
| data_nodes.emplace_back(input_node); | data_nodes.emplace_back(input_node); | ||||
| GELOGD("Name of data node is %s.", op_desc->GetName().c_str()); | |||||
| } else { | } else { | ||||
| getnext_nosink_nodes.emplace_back(input_node); | getnext_nosink_nodes.emplace_back(input_node); | ||||
| GELOGD("Name of getnext nosink is %s.", op_desc->GetName().c_str()); | |||||
| } | } | ||||
| } | } | ||||
| if (IsGetNextType(input_node)) { | if (IsGetNextType(input_node)) { | ||||
| @@ -111,6 +113,8 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector<NodePtr> &data_n | |||||
| GE_CHECK_NOTNULL(data_node->GetOpDesc()); | GE_CHECK_NOTNULL(data_node->GetOpDesc()); | ||||
| auto output_shape = data_node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims(); | auto output_shape = data_node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims(); | ||||
| auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; | auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; | ||||
| GELOGD("The %zu data node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(), | |||||
| formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str()); | |||||
| if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { | if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { | ||||
| GELOGI("No need to check sequence for constant."); | GELOGI("No need to check sequence for constant."); | ||||
| continue; | continue; | ||||
| @@ -151,6 +155,8 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector<NodePtr> &get | |||||
| for (size_t i = 0; i < data_count; ++i) { | for (size_t i = 0; i < data_count; ++i) { | ||||
| auto output_shape = data_node->GetOpDesc()->GetOutputDesc(i).GetShape().GetDims(); | auto output_shape = data_node->GetOpDesc()->GetOutputDesc(i).GetShape().GetDims(); | ||||
| auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; | auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; | ||||
| GELOGD("The %zu getnext node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(), | |||||
| formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str()); | |||||
| if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { | if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { | ||||
| GELOGI("No need to check sequence for constant."); | GELOGI("No need to check sequence for constant."); | ||||
| continue; | continue; | ||||
| @@ -80,6 +80,26 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | include ${BUILD_HOST_SHARED_LIBRARY} | ||||
| #compiler for device ops kernel builder | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libhost_cpu_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_SHARED_LIBRARY} | |||||
| #compiler for host static lib | #compiler for host static lib | ||||
| include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
| LOCAL_MODULE := libhost_cpu_opskernel_builder | LOCAL_MODULE := libhost_cpu_opskernel_builder | ||||
| @@ -57,9 +57,9 @@ struct GraphExecutionContext { | |||||
| do { \ | do { \ | ||||
| if ((context != nullptr) && (context)->profiler != nullptr) { \ | if ((context != nullptr) && (context)->profiler != nullptr) { \ | ||||
| if (node_name != nullptr) { \ | if (node_name != nullptr) { \ | ||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, ##__VA_ARGS__);\ | |||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\ | |||||
| } else { \ | } else { \ | ||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \ | |||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ | |||||
| }\ | }\ | ||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| @@ -57,6 +57,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE: | case aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE: | ||||
| GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed."); | GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed."); | ||||
| break; | break; | ||||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | |||||
| GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | |||||
| break; | |||||
| default: | default: | ||||
| GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | ||||
| node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | ||||
| @@ -123,6 +126,39 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID, | |||||
| "Node[%s] parse ext session info failed as infoLen must be %zu but %u.", | |||||
| node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen); | |||||
| session_info_ = reinterpret_cast<AicpuSessionInfo *>(aicpu_ext_info->infoMsg); | |||||
| GELOGI("Node[%s] parse session info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | |||||
| if (session_info_ == nullptr) { | |||||
| GELOGD("There is no session info in ext_info, no need update."); | |||||
| return SUCCESS; | |||||
| } | |||||
| session_info_->sessionId = session_id; | |||||
| session_info_->kernelId = kernel_id; | |||||
| session_info_->sessFlag = sess_flag; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) { | |||||
| if (session_info_ == nullptr) { | |||||
| GELOGD("There is no session info in ext_info, no need update."); | |||||
| return SUCCESS; | |||||
| } | |||||
| session_info_->sessionId = session_id; | |||||
| session_info_->sessFlag = true; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const GeTensorDesc &input_desc) { | Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const GeTensorDesc &input_desc) { | ||||
| GE_CHECK_LE(input_index, input_num_); | GE_CHECK_LE(input_index, input_num_); | ||||
| const auto &shape = input_desc.GetShape(); | const auto &shape = input_desc.GetShape(); | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "external/ge/ge_api_error_codes.h" | #include "external/ge/ge_api_error_codes.h" | ||||
| #include "cce/fwk_adpt_struct.h" | #include "cce/fwk_adpt_struct.h" | ||||
| #include "cce/aicpu_engine_struct.h" | |||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "graph/ge_tensor.h" | #include "graph/ge_tensor.h" | ||||
| @@ -26,6 +27,7 @@ namespace ge { | |||||
| namespace hybrid { | namespace hybrid { | ||||
| using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; | using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; | ||||
| using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; | using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; | ||||
| using AicpuSessionInfo = SessionInfo; | |||||
| class AicpuExtInfoHandler { | class AicpuExtInfoHandler { | ||||
| public: | public: | ||||
| @@ -51,6 +53,10 @@ class AicpuExtInfoHandler { | |||||
| Status UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc); | Status UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc); | ||||
| Status UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag); | |||||
| Status UpdateSessionInfoSessionId(uint64_t session_id); | |||||
| Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | ||||
| private: | private: | ||||
| @@ -58,6 +64,7 @@ class AicpuExtInfoHandler { | |||||
| Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info); | Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | |||||
| static Status UpdateShapeAndType(const GeShape &shape, | static Status UpdateShapeAndType(const GeShape &shape, | ||||
| DataType data_type, | DataType data_type, | ||||
| @@ -72,6 +79,7 @@ class AicpuExtInfoHandler { | |||||
| const uint32_t input_num_; | const uint32_t input_num_; | ||||
| const uint32_t output_num_; | const uint32_t output_num_; | ||||
| UnknowShapeOpType unknown_type_; | UnknowShapeOpType unknown_type_; | ||||
| AicpuSessionInfo *session_info_ = nullptr; | |||||
| std::unique_ptr<uint8_t[]> ext_info_; | std::unique_ptr<uint8_t[]> ext_info_; | ||||
| size_t ext_info_len_ = 0; | size_t ext_info_len_ = 0; | ||||
| @@ -40,29 +40,36 @@ Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorB | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info) { | |||||
| if (node_item_->is_dynamic) { | |||||
| // dynamic node must have ext info | |||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), | |||||
| "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", | |||||
| node_name_.c_str(), kernel_ext_info.size()); | |||||
| } | |||||
| // if no ext info no need copy to device. | |||||
| Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_t session_id) { | |||||
| if (kernel_ext_info.empty()) { | if (kernel_ext_info.empty()) { | ||||
| GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", | |||||
| node_name_.c_str(), node_item_->is_dynamic ? "true" : "false"); | |||||
| return SUCCESS; | |||||
| if (node_item_->is_dynamic) { | |||||
| // dynamic node must have ext info | |||||
| GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } else { | |||||
| // if no ext info no need copy to device. | |||||
| GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", | |||||
| node_name_.c_str(), node_item_->is_dynamic ? "true" : "false"); | |||||
| return SUCCESS; | |||||
| } | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), | |||||
| "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", | |||||
| node_name_.c_str(), kernel_ext_info.size()); | |||||
| GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); | |||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | |||||
| "UpdateSessionInfoSessionId failed."); | |||||
| // copy task args buf | // copy task args buf | ||||
| GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_ext_info.size(), ext_info_addr_dev_), | |||||
| GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | |||||
| "Node[%s] alloc kernel_ext_info buf failed, size=%zu", | "Node[%s] alloc kernel_ext_info buf failed, size=%zu", | ||||
| node_name_.c_str(), kernel_ext_info.size()); | |||||
| node_name_.c_str(), aicpu_ext_handle_.GetExtInfoLen()); | |||||
| // copy default ext info to device | // copy default ext info to device | ||||
| GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), | GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), | ||||
| kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| aicpu_ext_handle_.GetExtInfo(), aicpu_ext_handle_.GetExtInfoLen(), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -290,7 +297,8 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { | |||||
| node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size); | node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size); | ||||
| // init ext info | // init ext info | ||||
| GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name_.c_str()); | |||||
| uint64_t ext_session_id = model.GetSessionId(); | |||||
| GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name_.c_str()); | |||||
| GE_CHK_STATUS_RET(InitForDependComputeTask(), "Node[%s] init for depend compute task failed.", node_name_.c_str()); | GE_CHK_STATUS_RET(InitForDependComputeTask(), "Node[%s] init for depend compute task failed.", node_name_.c_str()); | ||||
| // build fwk_op_kernel. | // build fwk_op_kernel. | ||||
| @@ -679,7 +687,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { | |||||
| "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | ||||
| node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size); | node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size); | ||||
| GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name.c_str()); | |||||
| uint64_t ext_session_id = model.GetSessionId(); | |||||
| GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name.c_str()); | |||||
| if (ext_info_addr_dev_ == nullptr) { | if (ext_info_addr_dev_ == nullptr) { | ||||
| aicpu_param_head->extInfoLength = 0; | aicpu_param_head->extInfoLength = 0; | ||||
| @@ -43,7 +43,7 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
| protected: | protected: | ||||
| virtual Status InitExtInfo(const std::string &kernel_ext_info); | |||||
| virtual Status InitExtInfo(const std::string &kernel_ext_info, int64_t session_id); | |||||
| virtual Status UpdateExtInfo(); | virtual Status UpdateExtInfo(); | ||||
| @@ -110,7 +110,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||||
| Status initSystemStatus = SystemInitialize(options); | Status initSystemStatus = SystemInitialize(options); | ||||
| GE_TIMESTAMP_END(SystemInitialize, "InnerInitialize::SystemInitialize"); | GE_TIMESTAMP_END(SystemInitialize, "InnerInitialize::SystemInitialize"); | ||||
| if (initSystemStatus != SUCCESS) { | if (initSystemStatus != SUCCESS) { | ||||
| GELOGE(initSystemStatus); | |||||
| GELOGE(initSystemStatus, "GE system initial failed."); | |||||
| RollbackInit(); | RollbackInit(); | ||||
| return initSystemStatus; | return initSystemStatus; | ||||
| } | } | ||||
| @@ -120,7 +120,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||||
| Status initEmStatus = engineManager_.Initialize(options); | Status initEmStatus = engineManager_.Initialize(options); | ||||
| GE_TIMESTAMP_END(EngineInitialize, "InnerInitialize::EngineInitialize"); | GE_TIMESTAMP_END(EngineInitialize, "InnerInitialize::EngineInitialize"); | ||||
| if (initEmStatus != SUCCESS) { | if (initEmStatus != SUCCESS) { | ||||
| GELOGE(initEmStatus); | |||||
| GELOGE(initEmStatus, "GE engine manager initial failed."); | |||||
| RollbackInit(); | RollbackInit(); | ||||
| return initEmStatus; | return initEmStatus; | ||||
| } | } | ||||
| @@ -130,7 +130,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||||
| Status initOpsStatus = opsManager_.Initialize(options); | Status initOpsStatus = opsManager_.Initialize(options); | ||||
| GE_TIMESTAMP_END(OpsManagerInitialize, "InnerInitialize::OpsManagerInitialize"); | GE_TIMESTAMP_END(OpsManagerInitialize, "InnerInitialize::OpsManagerInitialize"); | ||||
| if (initOpsStatus != SUCCESS) { | if (initOpsStatus != SUCCESS) { | ||||
| GELOGE(initOpsStatus); | |||||
| GELOGE(initOpsStatus, "GE ops manager initial failed."); | |||||
| RollbackInit(); | RollbackInit(); | ||||
| return initOpsStatus; | return initOpsStatus; | ||||
| } | } | ||||
| @@ -140,7 +140,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||||
| Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options); | Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options); | ||||
| GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager"); | GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager"); | ||||
| if (initOpsBuilderStatus != SUCCESS) { | if (initOpsBuilderStatus != SUCCESS) { | ||||
| GELOGE(initOpsBuilderStatus); | |||||
| GELOGE(initOpsBuilderStatus, "GE ops builder manager initial failed."); | |||||
| RollbackInit(); | RollbackInit(); | ||||
| return initOpsBuilderStatus; | return initOpsBuilderStatus; | ||||
| } | } | ||||
| @@ -150,7 +150,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||||
| Status initSmStatus = sessionManager_.Initialize(options); | Status initSmStatus = sessionManager_.Initialize(options); | ||||
| GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); | GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); | ||||
| if (initSmStatus != SUCCESS) { | if (initSmStatus != SUCCESS) { | ||||
| GELOGE(initSmStatus); | |||||
| GELOGE(initSmStatus, "GE session manager initial failed."); | |||||
| RollbackInit(); | RollbackInit(); | ||||
| return initSmStatus; | return initSmStatus; | ||||
| } | } | ||||
| @@ -504,7 +504,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip | |||||
| for (auto iter = options.begin(); iter != options.end(); iter++) { | for (auto iter = options.begin(); iter != options.end(); iter++) { | ||||
| std::string key = iter->first; | std::string key = iter->first; | ||||
| std::string option_name = iter->second; | std::string option_name = iter->second; | ||||
| GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||||
| GELOGD("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||||
| } | } | ||||
| } | } | ||||
| @@ -340,7 +340,7 @@ void Impl::SetRtSocVersion() { | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGW("Set soc version %s failed. ret:0x%X", soc_version, rt_ret); | GELOGW("Set soc version %s failed. ret:0x%X", soc_version, rt_ret); | ||||
| } | } | ||||
| GELOGI("Set soc version %s success.", soc_version); | |||||
| GELOGD("Set soc version %s success.", soc_version); | |||||
| } | } | ||||
| } | } | ||||
| @@ -359,25 +359,25 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTe | |||||
| GE_CHECK_NOTNULL(op); | GE_CHECK_NOTNULL(op); | ||||
| if (op->GetType() == DATA) { | if (op->GetType() == DATA) { | ||||
| (void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); | (void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); | ||||
| GELOGI("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); | |||||
| GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); | |||||
| ge::GeTensorDesc tensor = op->GetInputDesc(0); | ge::GeTensorDesc tensor = op->GetInputDesc(0); | ||||
| string data_op_name = op->GetName(); | string data_op_name = op->GetName(); | ||||
| GELOGI("Data op name: %s", data_op_name.c_str()); | |||||
| GELOGD("Data op name: %s", data_op_name.c_str()); | |||||
| ge::GeShape data_shape; | ge::GeShape data_shape; | ||||
| auto iter = omg_context_.input_dims.find(data_op_name); | auto iter = omg_context_.input_dims.find(data_op_name); | ||||
| if (iter != omg_context_.input_dims.end()) { | if (iter != omg_context_.input_dims.end()) { | ||||
| data_shape = ge::GeShape(iter->second); | data_shape = ge::GeShape(iter->second); | ||||
| GELOGI("Data op get shape from Context."); | |||||
| GELOGD("Data op get shape from Context."); | |||||
| } else { | } else { | ||||
| data_shape = tensor.GetShape(); | data_shape = tensor.GetShape(); | ||||
| GELOGI("Data op get shape from InputDesc in ge ir graph."); | |||||
| GELOGD("Data op get shape from InputDesc in ge ir graph."); | |||||
| } | } | ||||
| // If user point input format, do work for all data ops; else do according to tensor_desc | // If user point input format, do work for all data ops; else do according to tensor_desc | ||||
| auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ? | auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ? | ||||
| ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat(); | ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat(); | ||||
| ge::DataType data_type = tensor.GetDataType(); | ge::DataType data_type = tensor.GetDataType(); | ||||
| string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); | string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); | ||||
| GELOGI("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); | |||||
| GELOGD("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); | |||||
| ge::GeTensor inputTensor; | ge::GeTensor inputTensor; | ||||
| ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type); | ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type); | ||||
| @@ -69,7 +69,7 @@ target_link_libraries(atc PRIVATE | |||||
| json | json | ||||
| runtime_compile | runtime_compile | ||||
| slog | slog | ||||
| mmpa | |||||
| static_mmpa | |||||
| -lrt | -lrt | ||||
| -ldl | -ldl | ||||
| ) | ) | ||||
| @@ -52,9 +52,11 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) { | |||||
| if (!is_init_) { | if (!is_init_) { | ||||
| dev_index_ = dev_index; | dev_index_ = dev_index; | ||||
| job_id_ = job_id; | job_id_ = job_id; | ||||
| char *file_dir_env = std::getenv(FMK_STATUS_FILE_DIR_ENV); | |||||
| char file_dir_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(FMK_STATUS_FILE_DIR_ENV, file_dir_env, MMPA_MAX_PATH); | |||||
| string csa_path_prefix; | string csa_path_prefix; | ||||
| if (file_dir_env != nullptr) { | |||||
| if (res == EN_OK) { | |||||
| csa_path_prefix = file_dir_env; | csa_path_prefix = file_dir_env; | ||||
| } | } | ||||
| if (!csa_path_prefix.empty()) { | if (!csa_path_prefix.empty()) { | ||||
| @@ -186,21 +188,21 @@ Status CsaInteract::WriteHcomDetection(const std::string &content) { | |||||
| /// | /// | ||||
| Status CsaInteract::WriteFile(const std::string &file_name, const std::string &content) { | Status CsaInteract::WriteFile(const std::string &file_name, const std::string &content) { | ||||
| // if file path is not exist, then make path | // if file path is not exist, then make path | ||||
| INT32 flags = O_WRONLY | O_TRUNC | O_CREAT; | |||||
| int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | S_IRGRP); | |||||
| INT32 flags = M_WRONLY | O_TRUNC | M_CREAT; | |||||
| int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); | |||||
| if (fd == EN_ERROR) { | if (fd == EN_ERROR) { | ||||
| if (MakePath(file_name) != SUCCESS) { | if (MakePath(file_name) != SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "csainteract create file path fail, errno is %d", errno); | GELOGE(INTERNAL_ERROR, "csainteract create file path fail, errno is %d", errno); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | S_IRGRP); | |||||
| fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); | |||||
| if (fd == EN_ERROR) { | if (fd == EN_ERROR) { | ||||
| GELOGE(INTERNAL_ERROR, "open file fail, errno is %d", errno); | GELOGE(INTERNAL_ERROR, "open file fail, errno is %d", errno); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| ssize_t ret = write(fd, content.c_str(), content.length()); | |||||
| mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length()); | |||||
| if (ret == EN_ERROR) { | if (ret == EN_ERROR) { | ||||
| GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); | GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); | ||||
| ret = mmClose(fd); | ret = mmClose(fd); | ||||
| @@ -239,7 +241,7 @@ Status CsaInteract::MakePath(const std::string &file_name) { | |||||
| while (found != std::string::npos) { | while (found != std::string::npos) { | ||||
| std::string pre_path = file_path.substr(0, found + 1); | std::string pre_path = file_path.substr(0, found + 1); | ||||
| if (mmAccess(pre_path.c_str()) != EN_OK) { | if (mmAccess(pre_path.c_str()) != EN_OK) { | ||||
| if (mmMkdir(pre_path.c_str(), S_IRWXU) != EN_OK) { | |||||
| if (mmMkdir(pre_path.c_str(), M_IRWXU) != EN_OK) { | |||||
| GELOGE(INTERNAL_ERROR, "csainteract mkdir fail, errno is %d", errno); | GELOGE(INTERNAL_ERROR, "csainteract mkdir fail, errno is %d", errno); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -85,7 +85,7 @@ Status OpsKernelManager::Initialize(const map<string, string> &options_const) { | |||||
| initialize_ = options; | initialize_ = options; | ||||
| Status rst0 = plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_); | Status rst0 = plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_); | ||||
| if (rst0 == FAILED) { | if (rst0 == FAILED) { | ||||
| GELOGE(GE_OPS_GET_NO_VALID_SO); | |||||
| GELOGE(GE_OPS_GET_NO_VALID_SO, "There is invalid so about OpsKernelInfoStore."); | |||||
| return GE_OPS_GET_NO_VALID_SO; | return GE_OPS_GET_NO_VALID_SO; | ||||
| } | } | ||||
| Status rst1 = | Status rst1 = | ||||
| @@ -391,7 +391,7 @@ void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name, | |||||
| continue; | continue; | ||||
| } | } | ||||
| if (attrs.engineName == engine_name) { | if (attrs.engineName == engine_name) { | ||||
| GELOGI("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(), | |||||
| GELOGD("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(), | |||||
| attrs.engineName.c_str()); | attrs.engineName.c_str()); | ||||
| graph_optimizer.push_back(it.second); | graph_optimizer.push_back(it.second); | ||||
| } | } | ||||
| @@ -61,7 +61,7 @@ Status SessionManager::SetRtContext(SessionId session_id, rtContext_t rt_context | |||||
| Status SessionManager::CreateSession(const std::map<std::string, std::string> &options, SessionId &session_id) { | Status SessionManager::CreateSession(const std::map<std::string, std::string> &options, SessionId &session_id) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionId next_session_id = 0; | SessionId next_session_id = 0; | ||||
| @@ -92,7 +92,7 @@ Status SessionManager::CreateSession(const std::map<std::string, std::string> &o | |||||
| Status SessionManager::DestroySession(SessionId session_id) { | Status SessionManager::DestroySession(SessionId session_id) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| std::lock_guard<std::mutex> lock(mutex_); | std::lock_guard<std::mutex> lock(mutex_); | ||||
| @@ -119,7 +119,7 @@ Status SessionManager::DestroySession(SessionId session_id) { | |||||
| Status SessionManager::GetVariable(SessionId session_id, const std::string &name, Tensor &val) { | Status SessionManager::GetVariable(SessionId session_id, const std::string &name, Tensor &val) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -143,7 +143,7 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G | |||||
| Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph, | Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph, | ||||
| const std::map<std::string, std::string> &options) { | const std::map<std::string, std::string> &options) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -173,7 +173,7 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G | |||||
| Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph, | Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph, | ||||
| const std::map<std::string, std::string> &options) { | const std::map<std::string, std::string> &options) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -203,7 +203,7 @@ Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, | |||||
| Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector<Tensor> &inputs, | Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector<Tensor> &inputs, | ||||
| std::vector<Tensor> &outputs) { | std::vector<Tensor> &outputs) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -221,7 +221,7 @@ Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const s | |||||
| Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) { | Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -239,7 +239,7 @@ Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) { | |||||
| bool SessionManager::HasSession(SessionId session_id) { | bool SessionManager::HasSession(SessionId session_id) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return false; | return false; | ||||
| } | } | ||||
| return session_manager_map_.find(session_id) != session_manager_map_.end(); | return session_manager_map_.find(session_id) != session_manager_map_.end(); | ||||
| @@ -247,7 +247,7 @@ bool SessionManager::HasSession(SessionId session_id) { | |||||
| Status SessionManager::GetNextSessionId(SessionId &next_session_id) { | Status SessionManager::GetNextSessionId(SessionId &next_session_id) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| static SessionId session_id = 0; | static SessionId session_id = 0; | ||||
| @@ -260,7 +260,7 @@ Status SessionManager::RegisterCallBackFunc( | |||||
| SessionId session_id, const std::string &key, | SessionId session_id, const std::string &key, | ||||
| const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) { | const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -278,7 +278,7 @@ Status SessionManager::RegisterCallBackFunc( | |||||
| Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -297,7 +297,7 @@ Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const | |||||
| Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id, | Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id, | ||||
| const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) { | const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -317,7 +317,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std: | |||||
| std::vector<Tensor> &var_values) { | std::vector<Tensor> &var_values) { | ||||
| // step 0: init session manager | // step 0: init session manager | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return GE_SESSION_MANAGER_NOT_INIT; | return GE_SESSION_MANAGER_NOT_INIT; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -383,7 +383,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std: | |||||
| bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id) { | bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id) { | ||||
| if (!init_flag_) { | if (!init_flag_) { | ||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||||
| GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||||
| return true; | return true; | ||||
| } | } | ||||
| SessionPtr innerSession = nullptr; | SessionPtr innerSession = nullptr; | ||||
| @@ -44,8 +44,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { | |||||
| delete task; | delete task; | ||||
| task = nullptr; | task = nullptr; | ||||
| } | } | ||||
| GELOGI("SingleOp destory sessionId = %lu", aicpu_session_id_); | |||||
| ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); | |||||
| } | } | ||||
| Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs) { | Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs) { | ||||
| @@ -59,7 +57,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std:: | |||||
| for (size_t i = 0; i < num_inputs; ++i) { | for (size_t i = 0; i < num_inputs; ++i) { | ||||
| // preventing from read out of bound | // preventing from read out of bound | ||||
| size_t aligned_size = GetAlignedSize(inputs[i].length); | size_t aligned_size = GetAlignedSize(inputs[i].length); | ||||
| GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu", | |||||
| GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%zu", | |||||
| i, aligned_size, inputs[i].length, input_sizes_[i]); | i, aligned_size, inputs[i].length, input_sizes_[i]); | ||||
| if (aligned_size < input_sizes_[i]) { | if (aligned_size < input_sizes_[i]) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu," | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu," | ||||
| @@ -77,7 +75,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std:: | |||||
| for (size_t i = 0; i < num_outputs; ++i) { | for (size_t i = 0; i < num_outputs; ++i) { | ||||
| // preventing from write out of bound | // preventing from write out of bound | ||||
| size_t aligned_size = GetAlignedSize(outputs[i].length); | size_t aligned_size = GetAlignedSize(outputs[i].length); | ||||
| GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu", | |||||
| GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%zu", | |||||
| i, aligned_size, outputs[i].length, output_sizes_[i]); | i, aligned_size, outputs[i].length, output_sizes_[i]); | ||||
| if (aligned_size < output_sizes_[i]) { | if (aligned_size < output_sizes_[i]) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu," | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu," | ||||
| @@ -143,7 +141,7 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve | |||||
| GE_CHECK_NOTNULL(task_io_addr); | GE_CHECK_NOTNULL(task_io_addr); | ||||
| auto io_addr = reinterpret_cast<uint64_t *>(const_cast<uintptr_t *>(task_io_addr)); | auto io_addr = reinterpret_cast<uint64_t *>(const_cast<uintptr_t *>(task_io_addr)); | ||||
| for (size_t i = 0; i < io_addr_num; ++i) { | for (size_t i = 0; i < io_addr_num; ++i) { | ||||
| io_addr[i] = reinterpret_cast<uintptr_t>(args_[i]); | |||||
| io_addr[i] = static_cast<uintptr_t>(args_[i]); | |||||
| } | } | ||||
| } else { | } else { | ||||
| GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType()); | GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType()); | ||||
| @@ -180,17 +178,11 @@ void SingleOp::SetStream(rtStream_t stream) { | |||||
| stream_ = stream; | stream_ = stream; | ||||
| } | } | ||||
| void SingleOp::SetSessionID(uint64_t session_id) { | |||||
| aicpu_session_id_ = session_id; | |||||
| } | |||||
| DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream) | DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream) | ||||
| : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { | : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { | ||||
| } | } | ||||
| DynamicSingleOp::~DynamicSingleOp() { | DynamicSingleOp::~DynamicSingleOp() { | ||||
| GELOGI("DynamicSingleOp destory sessionId = %lu", aicpu_session_id_); | |||||
| ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); | |||||
| } | } | ||||
| Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | ||||
| @@ -299,8 +291,4 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | ||||
| } | } | ||||
| } | } | ||||
| void DynamicSingleOp::SetSessionID(uint64_t session_id) { | |||||
| aicpu_session_id_ = session_id; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -37,7 +37,6 @@ class SingleOp { | |||||
| Status ExecuteAsync(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status ExecuteAsync(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| void SetStream(rtStream_t stream); | void SetStream(rtStream_t stream); | ||||
| void SetSessionID(uint64_t session_id); | |||||
| private: | private: | ||||
| Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| @@ -52,7 +51,6 @@ class SingleOp { | |||||
| std::vector<void *> output_addr_list_; | std::vector<void *> output_addr_list_; | ||||
| std::vector<size_t> output_sizes_; | std::vector<size_t> output_sizes_; | ||||
| std::vector<uintptr_t> args_; | std::vector<uintptr_t> args_; | ||||
| uint64_t aicpu_session_id_ = 0; | |||||
| std::vector<OpTask *> tasks_; | std::vector<OpTask *> tasks_; | ||||
| std::vector<std::vector<uintptr_t *>> arg_table_; | std::vector<std::vector<uintptr_t *>> arg_table_; | ||||
| @@ -66,7 +64,6 @@ class DynamicSingleOp { | |||||
| const std::vector<DataBuffer> &inputs, | const std::vector<DataBuffer> &inputs, | ||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| std::vector<DataBuffer> &outputs); | std::vector<DataBuffer> &outputs); | ||||
| void SetSessionID(uint64_t session_id); | |||||
| private: | private: | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| @@ -89,7 +86,6 @@ class DynamicSingleOp { | |||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| size_t num_inputs_ = 0; | size_t num_inputs_ = 0; | ||||
| size_t num_outputs_ = 0; | size_t num_outputs_ = 0; | ||||
| uint64_t aicpu_session_id_ = 0; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_SINGLE_OP_SINGLE_OP_H_ | #endif // GE_SINGLE_OP_SINGLE_OP_H_ | ||||
| @@ -32,7 +32,7 @@ | |||||
| #include "task/aicpu_kernel_task_builder.h" | #include "task/aicpu_kernel_task_builder.h" | ||||
| #include "task/tbe_task_builder.h" | #include "task/tbe_task_builder.h" | ||||
| static std::atomic<std::uint64_t> aicpu_sessionid(0); | |||||
| static std::atomic<std::uint64_t> aicpu_kernel_id(0); | |||||
| using domi::TaskDef; | using domi::TaskDef; | ||||
| using std::unique_ptr; | using std::unique_ptr; | ||||
| @@ -252,7 +252,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | ||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| OpTask *task = nullptr; | OpTask *task = nullptr; | ||||
| auto ret = BuildCpuKernelTask(task_def.kernel(), &task); | |||||
| uint64_t singleop_kernel_id = aicpu_kernel_id++; | |||||
| GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id); | |||||
| auto ret = BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -265,14 +267,13 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| GELOGD("Building AICPU_TF task"); | GELOGD("Building AICPU_TF task"); | ||||
| AiCpuTask *aicpu_task = nullptr; | AiCpuTask *aicpu_task = nullptr; | ||||
| bool depend_compute_flag = false; | bool depend_compute_flag = false; | ||||
| uint64_t singleop_sessionid = aicpu_sessionid++; | |||||
| GELOGI("Build singleOp, sessionId = %lu", singleop_sessionid); | |||||
| auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_sessionid); | |||||
| uint64_t singleop_kernel_id = aicpu_kernel_id++; | |||||
| GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id); | |||||
| auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| single_op.tasks_.emplace_back(aicpu_task); | single_op.tasks_.emplace_back(aicpu_task); | ||||
| single_op.SetSessionID(singleop_sessionid); | |||||
| } else { | } else { | ||||
| // skip | // skip | ||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| @@ -329,7 +330,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa | |||||
| } | } | ||||
| Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | ||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id) { | |||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) { | |||||
| auto iter = op_list_.find(kernel_def.op_index()); | auto iter = op_list_.find(kernel_def.op_index()); | ||||
| if (iter == op_list_.end()) { | if (iter == op_list_.end()) { | ||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index()); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index()); | ||||
| @@ -342,7 +343,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| } | } | ||||
| auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); | auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); | ||||
| auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, session_id); | |||||
| auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "build aicpu_TF op task failed"); | GELOGE(ret, "build aicpu_TF op task failed"); | ||||
| return ret; | return ret; | ||||
| @@ -353,7 +354,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task) { | |||||
| Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id) { | |||||
| const auto &context = kernel_def.context(); | const auto &context = kernel_def.context(); | ||||
| auto iter = op_list_.find(context.op_index()); | auto iter = op_list_.find(context.op_index()); | ||||
| if (iter == op_list_.end()) { | if (iter == op_list_.end()) { | ||||
| @@ -367,7 +368,7 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa | |||||
| } | } | ||||
| auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def); | auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def); | ||||
| auto ret = builder.BuildTask(*aicpucc_task); | |||||
| auto ret = builder.BuildTask(*aicpucc_task, kernel_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "build aicpu_CC op task failed"); | GELOGE(ret, "build aicpu_CC op task failed"); | ||||
| return ret; | return ret; | ||||
| @@ -396,7 +397,9 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | ||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| OpTask *task = nullptr; | OpTask *task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); | |||||
| uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||||
| GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||||
| GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | |||||
| single_op.op_task_.reset(task); | single_op.op_task_.reset(task); | ||||
| } else { | } else { | ||||
| GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | ||||
| @@ -430,10 +433,10 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| GELOGD("Building AICPU_TF task"); | GELOGD("Building AICPU_TF task"); | ||||
| AiCpuTask *aicpu_task = nullptr; | AiCpuTask *aicpu_task = nullptr; | ||||
| bool depend_compute_flag = false; | bool depend_compute_flag = false; | ||||
| uint64_t dynamic_singleop_sessionid = aicpu_sessionid++; | |||||
| GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid); | |||||
| uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||||
| GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, | GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, | ||||
| depend_compute_flag, dynamic_singleop_sessionid)); | |||||
| depend_compute_flag, dynamic_singleop_kernel_id)); | |||||
| if (depend_compute_flag) { | if (depend_compute_flag) { | ||||
| if (i >= tasks.size() - 1) { | if (i >= tasks.size() - 1) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "The copy task of the fourth operator was not found."); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "The copy task of the fourth operator was not found."); | ||||
| @@ -444,7 +447,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | ||||
| } | } | ||||
| single_op.op_task_.reset(aicpu_task); | single_op.op_task_.reset(aicpu_task); | ||||
| single_op.SetSessionID(dynamic_singleop_sessionid); | |||||
| } else { | } else { | ||||
| // skip | // skip | ||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| @@ -69,8 +69,8 @@ class SingleOpModel { | |||||
| Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | ||||
| Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); | Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); | ||||
| Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | ||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id); | |||||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task); | |||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | |||||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | |||||
| Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | ||||
| static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | ||||
| @@ -46,7 +46,7 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||||
| Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { | |||||
| auto ret = SetKernelArgs(task); | auto ret = SetKernelArgs(task); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| @@ -76,7 +76,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||||
| "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | ||||
| kernel_ext_info.size(), kernel_ext_info_size); | kernel_ext_info.size(), kernel_ext_info_size); | ||||
| ret = task.SetExtInfoAndType(kernel_ext_info); | |||||
| ret = task.SetExtInfoAndType(kernel_ext_info, kernel_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Init ext info failed."); | GELOGE(ret, "Init ext info failed."); | ||||
| return ret; | return ret; | ||||