diff --git a/CMakeLists.txt b/CMakeLists.txt index 41520b14..ac0240d9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ else () elseif(PLATFORM STREQUAL "inference") find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) - find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) + find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") @@ -119,12 +119,12 @@ else () find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) - find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) + find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) + find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) - find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) - find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) - find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) + find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) + find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) else() message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() diff --git a/README_CN.md b/README_CN.md index 0a1e9c09..48fe4216 100644 --- a/README_CN.md +++ b/README_CN.md @@ -34,18 +34,6 @@ 在训练/推理过程中,上述过程会自动执行,通过上述图操作,GE可以将前端下发的图转换为一种可以在昇腾AI处理器上高效运行的图模式。 - - -- [安装说明](#安装说明) - - [安装GE](#安装ge) - - [源码安装](#源码安装) - - [社区](#社区) - - [贡献](#贡献) - - [Release Notes](#release-notes) - - [License](#license) - - - # 安装说明 ## 安装GE @@ -54,45 +42,8 @@ GE内嵌在MindSpore安装包中,MindSpore安装完毕后,GE以三个动态 ## 源码安装 -GE也支持由源码编译,进行源码编译前,首先确保你有昇腾910 AI处理器的环境,同时系统满足以下要求: - -- GCC >= 7.3.0 -- CMake >= 3.14.0 -- Autoconf >= 2.64 -- Libtool >= 2.4.6 -- Automake >= 1.15.1 - -编译完成后会生成几个动态库,他们会链接到MindSpore中执行,无法单独运行。 - -1. 下载GE源码。 - - GE源码托管在码云平台,可由此下载。 - ``` - git clone https://gitee.com/mindspore/graphengine.git - cd graphengine - ``` - -2. 在GE根目录下执行下列命令即可进行编译。 - - ``` - bash build.sh - ``` - - > - 开始编译之前,请确保正确设置相关的环境变量。 - > - 在`build.sh`的脚本中,会进行`git clone`操作,请确保网络连接正常且git配置正确。 - > - 在`build.sh`的脚本中,默认会8线程编译,如果机器性能较差,可能会编译失败。可以通过`-j{线程数}`来控制线程数,如`bash build.sh –j4`。 - -3. 完成编译后,相应的动态库文件会生成在output文件夹中。 - -更多指令帮助,可以使用: -``` -bash build.sh –h -``` -如果想清除历史编译记录,可以如下操作: -``` -rm -rf build/ output/ -bash build.sh -``` +GE也支持由源码编译,请参考以下链接完成: +[个人开发工具链](https://gitee.com/mindspore/graphengine/blob/master/scripts/readme.md) ## 社区 diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 4a296e87..9fff30f7 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -373,6 +373,7 @@ set(TRAIN_SRC_LIST "opskernel_manager/ops_kernel_builder_manager.cc" "session/inner_session.cc" "session/session_manager.cc" + "graph/execute/model_executor.cc" "single_op/single_op.cc" "single_op/single_op_manager.cc" "single_op/single_op_model.cc" @@ -473,8 +474,6 @@ set(INFER_SRC_LIST "common/ge/plugin_manager.cc" "common/ge/op_tiling_manager.cc" "init/gelib.cc" - "session/inner_session.cc" - "session/session_manager.cc" "engine_manager/dnnengine_manager.cc" "opskernel_manager/ops_kernel_manager.cc" "opskernel_manager/ops_kernel_builder_manager.cc" @@ -719,6 +718,12 @@ set(INFER_SRC_LIST "ge_opt_info/ge_opt_info.cc" ) +set(RUNNER_SRC_LIST + "client/ge_api.cc" + "session/inner_session.cc" + "session/session_manager.cc" +) + if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) message("CMAKE_CXX_COMPILER_VERSION = ${CMAKE_CXX_COMPILER_VERSION}") ############ libge_runner.so ############ diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index aa88cfb4..3cf7c3c4 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -47,6 +47,7 @@ const int32_t kMaxStrLen = 128; static bool g_ge_initialized = false; static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use +static std::shared_ptr g_session_manager; namespace ge { void GetOpsProtoPath(std::string &opsproto_path) { @@ -70,8 +71,7 @@ Status CheckOptionsValid(const std::map &options) { auto job_id_iter = options.find(OPTION_EXEC_JOB_ID); if (job_id_iter != options.end()) { if (job_id_iter->second.length() > kMaxStrLen) { - GELOGE(PARAM_INVALID, "[Check][JobId]Failed," - "the job_id [%s] string length: %zu > max string length: %d", + GELOGE(PARAM_INVALID, "[Check][JobId]Failed, the job_id [%s] string length: %zu > max string length: %d", job_id_iter->second.c_str(), job_id_iter->second.length(), kMaxStrLen); REPORT_INPUT_ERROR("E10051", std::vector({"id", "length"}), std::vector({job_id_iter->second, @@ -95,8 +95,7 @@ Status GEInitializeImpl(const std::map &options) { std::string path_base = ge::GELib::GetPath(); auto ret = ErrorManager::GetInstance().Init(path_base); if (ret != SUCCESS) { - GELOGE(GE_CLI_INIT_FAILED, - "[Init][PathBase]Init failed when pass param path_base:%s", path_base.c_str()); + GELOGE(GE_CLI_INIT_FAILED, "[Init][PathBase]Init failed when pass param path_base:%s", path_base.c_str()); REPORT_CALL_ERROR("E19999", "Init failed when pass param path_base:%s", path_base.c_str()); return ret; } @@ -117,11 +116,9 @@ Status GEInitializeImpl(const std::map &options) { bool is_proto_init = manager->Initialize(option_tmp); GE_TIMESTAMP_END(GEInitialize, "GEInitialize::ManagerInitialize"); if (!is_proto_init) { - GELOGE(GE_CLI_INIT_FAILED, - "[Init][OpsProtoPath]Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid.", + GELOGE(GE_CLI_INIT_FAILED, "[Init][OpsProtoPath]Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid.", opsproto_path.c_str()); - REPORT_CALL_ERROR("E19999", "Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid", - opsproto_path.c_str()); + REPORT_CALL_ERROR("E19999", "Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid", opsproto_path.c_str()); return FAILED; } @@ -148,6 +145,22 @@ Status GEInitializeImpl(const std::map &options) { return FAILED; } + ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther); + GELOGI("sessionManager initial."); + GE_TIMESTAMP_START(SessionManagerInitialize); + g_session_manager = MakeShared(); + if (g_session_manager == nullptr) { + GELOGE(GE_CLI_INIT_FAILED, "[Init][Create]SessionManager failed"); + return FAILED; + } + ret = g_session_manager->Initialize(options); + GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); + if (ret != SUCCESS) { + GELOGE(ret, "[Init][SessionManager] GE session manager initial failed."); + REPORT_CALL_ERROR("E19999", "SessionManager initialize failed."); + return ret; + } + // 7.check return status, return if (!g_ge_initialized) { // Initialize success, first time calling initialize @@ -173,8 +186,7 @@ Status GEInitialize(const std::map &options) { for (auto &option : options) { if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { GELOGE(FAILED, "[Check][Param]Options invalid, first or second option is nullptr."); - REPORT_INNER_ERROR("E19999", "Check parameter's options invalid," - "the first or second option is nullptr."); + REPORT_INNER_ERROR("E19999", "Check parameter's options invalid, the first or second option is nullptr."); return FAILED; } std::string key = option.first.GetString(); @@ -217,6 +229,12 @@ Status GEFinalize() { ret = middle_ret; } } + + GELOGI("SessionManager finalization."); + if (g_session_manager != nullptr) { + (void)g_session_manager->Finalize(); // always success. + } + middle_ret = TBEPluginManager::Instance().Finalize(); if (middle_ret != SUCCESS) { ret = middle_ret; @@ -251,28 +269,18 @@ std::string GEGetWarningMsg() { Session::Session(const std::map &options) { ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther); GELOGT(TRACE_INIT, "Start to construct session."); - ErrorManager::GetInstance().GenWorkStreamIdDefault(); // check init status sessionId_ = 0; if (!g_ge_initialized) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Construct][Session]Failed because lack GEInitialize call before."); - REPORT_INNER_ERROR("E19999", - "Creating session failed because lack GEInitialize call before."); - return; - } - // call Initialize - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Construct][Session]Failed, GELib instance is nullptr or it is not InitFlag"); + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return; } GELOGT(TRACE_RUNNING, "Creating session"); uint64_t session_id = 0; - Status ret = instance_ptr->SessionManagerObj().CreateSession(options, session_id); + Status ret = g_session_manager->CreateSession(options, session_id); GELOGT(TRACE_RUNNING, "Session id is %lu", session_id); // check return status, return, update session id if success @@ -288,32 +296,21 @@ Session::Session(const std::map &options) { Session::Session(const std::map &options) { ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther); GELOGT(TRACE_INIT, "Session Constructor start"); - ErrorManager::GetInstance().GenWorkStreamIdDefault(); // check init status sessionId_ = 0; if (!g_ge_initialized) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Construct][Session]Failed because lack GEInitialize call before."); - REPORT_INNER_ERROR("E19999", - "Creating session failed because lack GEInitialize call before."); + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return; } // call Initialize - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Construct][Session]Failed, the GELib instance is nullptr or is not InitFlag"); - return; - } - GELOGT(TRACE_RUNNING, "Creating session"); std::map str_options; for (auto &option : options) { if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { GELOGE(FAILED, "[Construct][Session]Failed, the first or second option is nullptr."); - REPORT_INNER_ERROR("E19999", "Creating session's options invalid," - "the first or second option is nullptr."); + REPORT_INNER_ERROR("E19999", "Creating session's options invalid, the first or second option is nullptr."); return; } std::string key = option.first.GetString(); @@ -321,7 +318,7 @@ Session::Session(const std::map &options) { str_options[key] = val; } uint64_t session_id = 0; - Status ret = instance_ptr->SessionManagerObj().CreateSession(str_options, session_id); + Status ret = g_session_manager->CreateSession(str_options, session_id); GELOGT(TRACE_RUNNING, "Session id is %lu", session_id); // check return status, return, update session id if success @@ -350,19 +347,12 @@ Session::~Session() { try { uint64_t session_id = sessionId_; // call DestroySession - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGW("GE is not yet initialized or is finalized."); - return; - } GELOGT(TRACE_RUNNING, "Session id is %lu", session_id); - GELOGT(TRACE_RUNNING, "Destroying session"); - ret = instance_ptr->SessionManagerObj().DestroySession(session_id); + ret = g_session_manager->DestroySession(session_id); } catch (google::protobuf::FatalException &e) { - GELOGE(GE_CLI_SESS_DESTROY_FAILED, "[Destruct][Session]Failed " - "because get fatalException."); + GELOGE(GE_CLI_SESS_DESTROY_FAILED, "[Destruct][Session]Failed because get fatalException."); REPORT_CALL_ERROR("E19999", "Destruct session failed, get fatal exception"); } @@ -377,9 +367,7 @@ Session::~Session() { // Add Graph Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { - ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); std::map options; - ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); return AddGraph(graph_id, graph, options); } @@ -388,20 +376,16 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Add][Graph]Failed because GELib instance is nullptr or it is not InitFlag."); - REPORT_INNER_ERROR("E19999", - "AddGraph Failed, GELib instance is nullptr or it is not InitFlag."); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGD("Adding graph to session"); - Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, options); + Status ret = g_session_manager->AddGraph(sessionId_, graph_id, graph, options); if (ret != SUCCESS) { - GELOGE(ret, - "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); + GELOGE(ret, "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); return FAILED; } GELOGD("AddGraph finished in Session."); @@ -409,37 +393,31 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options) { +Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options) { ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Add][Graph]Failed, the GELib instance is nullptr or is not InitFlag."); - REPORT_INNER_ERROR("E19999", - "AddGraph Failed, GELib instance is nullptr or it is not InitFlag."); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGD("Adding graph to session"); std::map str_options; for (auto &option : options) { if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) { GELOGE(FAILED, "[Add][Graph]Failed, the first or second option is nullptr."); - REPORT_INNER_ERROR("E19999", - "Add Graph Failed, the first or second option is nullptr."); + REPORT_INNER_ERROR("E19999", "Add Graph Failed, the first or second option is nullptr."); return FAILED; } std::string key = option.first.GetString(); std::string val = option.second.GetString(); str_options[key] = val; } - Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, str_options); + Status ret = g_session_manager->AddGraph(sessionId_, graph_id, graph, str_options); if (ret != SUCCESS) { - GELOGE(ret, - "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); + GELOGE(ret, "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); return FAILED; } GELOGD("AddGraph finished in Session."); @@ -447,8 +425,6 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, } Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) { - ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); - ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); std::map options; return AddGraphWithCopy(graph_id, graph, options); } @@ -459,24 +435,20 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Add][Graph]Failed, the GELib instance is nullptr or is not InitFlag."); - REPORT_INNER_ERROR("E19999", - "AddGraph Failed, GELib instance is nullptr or is not InitFlag."); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + std::map str_options; for (auto it = options.begin(); it != options.end(); ++it) { str_options.insert({it->first.GetString(), it->second.GetString()}); } GELOGD("Adding graph to session"); - Status ret = instance_ptr->SessionManagerObj().AddGraphWithCopy(sessionId_, graph_id, graph, str_options); + Status ret = g_session_manager->AddGraphWithCopy(sessionId_, graph_id, graph, str_options); if (ret != SUCCESS) { - GELOGE(ret, - "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); + GELOGE(ret, "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); return FAILED; } GELOGD("AddGraph finished in Session."); @@ -487,29 +459,21 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, Status Session::RemoveGraph(uint32_t graph_id) { ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); GELOGT(TRACE_INIT, "Session RemoveGraph start"); - ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); // call RemoveGraph - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (!instance_ptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Remove][Graph]Failed, GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); - REPORT_INNER_ERROR("E19999", - "RemoveGraph Failed, GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } GELOGT(TRACE_RUNNING, "Removing Graph from session"); - Status ret = instance_ptr->SessionManagerObj().RemoveGraph(sessionId_, graph_id); + Status ret = g_session_manager->RemoveGraph(sessionId_, graph_id); // check return status, return if (ret != SUCCESS) { - GELOGE(ret, - "[Remove][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); - REPORT_CALL_ERROR("E19999", "Remove graph failed, error code:%u, " - "session_id:%lu, graph_id:%u", ret, sessionId_, graph_id); + GELOGE(ret, "[Remove][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); + REPORT_CALL_ERROR("E19999", "Remove graph failed, error code:%u, session_id:%lu, graph_id:%u", + ret, sessionId_, graph_id); return FAILED; } GELOGT(TRACE_STOP, "Session RemoveGraph finished"); @@ -568,29 +532,21 @@ void PrintOutputResult(std::vector &outputs) { Status Session::RunGraph(uint32_t graph_id, const std::vector &inputs, std::vector &outputs) { ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); GELOGT(TRACE_INIT, "Session RunGraph start"); - ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::vector graph_inputs = inputs; - // call RunGraph - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Run][Graph]Failed, GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); - REPORT_INNER_ERROR("E19999", - "RunGraph Failed, GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + + // call RunGraph GELOGT(TRACE_RUNNING, "Running Graph"); - Status ret = instance_ptr->SessionManagerObj().RunGraph(sessionId_, graph_id, graph_inputs, outputs); + Status ret = g_session_manager->RunGraph(sessionId_, graph_id, inputs, outputs); // check return status if (ret != SUCCESS) { - GELOGE(ret, - "[Run][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); - REPORT_CALL_ERROR("E19999", "Remove graph failed, error code:%u, " - "session_id:%lu, graph_id:%u", ret, sessionId_, graph_id); + GELOGE(ret, "[Run][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); + REPORT_CALL_ERROR("E19999", "Remove graph failed, error code:%u, session_id:%lu, graph_id:%u", + ret, sessionId_, graph_id); return FAILED; } @@ -609,30 +565,15 @@ Status Session::RunGraphWithStreamAsync(uint32_t graph_id, void *stream, const s std::vector &outputs) { ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); GELOGT(TRACE_INIT, "Start to run graph with stream async."); - ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Run][Graph]Run graph with stream async failed, the GELib instance is nullptr," - "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream); - REPORT_INNER_ERROR("E19999", - "Run graph with stream async failed, the GELib instance is nullptr" - "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream); - return FAILED; - } - if (!instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Run][Graph]Run graph with stream asyn failed, the GELib instance is not init," - "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream); - REPORT_INNER_ERROR("E19999", - "Run graph with stream asyn failed, the GELib instance is not init," - "session id = %lu, graph id = %u, stream = %p.", sessionId_, graph_id, stream); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGT(TRACE_RUNNING, "Run Graph Run graph with stream asyn."); - Status ret = instance_ptr->SessionManagerObj().RunGraphWithStreamAsync(sessionId_, graph_id, stream, inputs, - outputs); + Status ret = g_session_manager->RunGraphWithStreamAsync(sessionId_, graph_id, stream, inputs, outputs); if (ret != SUCCESS) { GELOGE(ret, "[Run][Graph]Run graph with stream asyn Failed," "error code = %u, session id = %lu, graph id = %u, stream = %p.", ret, sessionId_, graph_id, stream); @@ -648,40 +589,46 @@ Status Session::RunGraphWithStreamAsync(uint32_t graph_id, void *stream, const s // Register Call Back Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) { ErrorManager::GetInstance().GenWorkStreamIdDefault(); - return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); + return FAILED; + } + + return g_session_manager->RegisterCallBackFunc(sessionId_, key, callback); } Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) { ErrorManager::GetInstance().GenWorkStreamIdDefault(); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); + return FAILED; + } + std::string str_key; if (key != nullptr) { str_key = key; } - return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, str_key, callback); + return g_session_manager->RegisterCallBackFunc(sessionId_, str_key, callback); } // Build Graph Status Session::BuildGraph(uint32_t graph_id, const std::vector &inputs) { ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Build][Graph]Failed, the GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); - REPORT_INNER_ERROR("E19999", - "Build graph failed, the GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGT(TRACE_RUNNING, "Building Graph"); - Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs); + Status ret = g_session_manager->BuildGraph(sessionId_, graph_id, inputs); if (ret != SUCCESS) { - GELOGE(ret, - "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); - REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, " - "session_id:%lu, graph_id:%u", ret, sessionId_, graph_id); + GELOGE(ret, "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); + REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, session_id:%lu, graph_id:%u", + ret, sessionId_, graph_id); return FAILED; } return SUCCESS; @@ -691,24 +638,18 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector Status Session::BuildGraph(uint32_t graph_id, const std::vector &inputs) { ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Build][Graph]Failed, the GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); - REPORT_INNER_ERROR("E19999", - "Build graph failed, the GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGT(TRACE_RUNNING, "Building Graph"); - Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs); + Status ret = g_session_manager->BuildGraph(sessionId_, graph_id, inputs); if (ret != SUCCESS) { - GELOGE(ret, - "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", - ret, sessionId_, graph_id); - REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, " - "session_id:%lu, graph_id:%u", ret, sessionId_, graph_id); + GELOGE(ret, "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); + REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, session_id:%lu, graph_id:%u", + ret, sessionId_, graph_id); return FAILED; } return SUCCESS; @@ -719,26 +660,22 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector & RunAsyncCallback callback) { ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id); - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Run][Graph]RunGraphAsyncFailed, the GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); - REPORT_INNER_ERROR("E19999", - "RunGraphAsync Failed, the GELib instance is nullptr or is not InitFlag, " - "session_id %lu, graph_id %u", sessionId_, graph_id); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGT(TRACE_RUNNING, "Run Graph Asynchronously"); GELOGW( "The callback function will not be checked. Please ensure that the implementation of the function is trusted."); - Status ret = ge::GELib::GetInstance()->SessionManagerObj().RunGraphAsync(sessionId_, graph_id, inputs, callback); + Status ret = g_session_manager->RunGraphAsync(sessionId_, graph_id, inputs, callback); if (ret != SUCCESS) { GELOGE(ret, "[Run][Graph]RunGraphAsync Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id); - REPORT_CALL_ERROR("E19999", "RunGraphAsync Failed, error code:%u, session_id:%lu, " - "graph_id:%u", ret, sessionId_, graph_id); + REPORT_CALL_ERROR("E19999", "RunGraphAsync Failed, error code:%u, session_id:%lu, graph_id:%u", + ret, sessionId_, graph_id); return FAILED; } return SUCCESS; @@ -748,16 +685,14 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector & Status Session::GetVariables(const std::vector &var_names, std::vector &var_values) { ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); ErrorManager::GetInstance().GenWorkStreamIdDefault(); - auto instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Get][Variables]Failed, the GELib instance is nullptr or is not InitFlag."); - REPORT_INNER_ERROR("E19999", - "GetVariables failed, the GELib instance is nullptr or is not InitFlag."); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGT(TRACE_RUNNING, "Get Variables"); - Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, var_names, var_values); + Status ret = g_session_manager->GetVariables(sessionId_, var_names, var_values); if (ret != SUCCESS) { GELOGE(ret, "[Get][Variables]Failed, error code:%u, session_id:%lu.", ret, sessionId_); return FAILED; @@ -769,14 +704,12 @@ Status Session::GetVariables(const std::vector &var_names, std::vec Status Session::GetVariables(const std::vector &var_names, std::vector &var_values) { ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); ErrorManager::GetInstance().GenWorkStreamIdDefault(); - auto instance_ptr = ge::GELib::GetInstance(); - if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, - "[Get][Variables]Failed, the GELib instance is nullptr or is not InitFlag."); - REPORT_INNER_ERROR("E19999", - "GetVariables failed, the GELib instance is nullptr or is not InitFlag."); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); return FAILED; } + GELOGT(TRACE_RUNNING, "Get Variables"); std::vector str_var_names; for (auto &var_name : var_names) { @@ -787,17 +720,22 @@ Status Session::GetVariables(const std::vector &var_names, std::ve } str_var_names.emplace_back(var_name.GetString()); } - Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, str_var_names, var_values); + Status ret = g_session_manager->GetVariables(sessionId_, str_var_names, var_values); if (ret != SUCCESS) { GELOGE(ret, "[Get][Variables]Failed, error code:%u, session_id:%lu.", ret, sessionId_); - REPORT_CALL_ERROR("E19999", "Get variables failed, error code:%u, session_id:%lu.", - ret, sessionId_); + REPORT_CALL_ERROR("E19999", "Get variables failed, error code:%u, session_id:%lu.", ret, sessionId_); return FAILED; } return SUCCESS; } bool Session::IsGraphNeedRebuild(uint32_t graph_id) { - return ge::GELib::GetInstance()->SessionManagerObj().IsGraphNeedRebuild(sessionId_, graph_id); + if (!g_ge_initialized) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before."); + REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before."); + return false; + } + + return g_session_manager->IsGraphNeedRebuild(sessionId_, graph_id); } } // namespace ge diff --git a/ge/common/dump/dump_properties.cc b/ge/common/dump/dump_properties.cc index 84bdb7bf..bc645f61 100644 --- a/ge/common/dump/dump_properties.cc +++ b/ge/common/dump/dump_properties.cc @@ -204,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties: FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpProperties::SetDumpOptions() { if (enable_dump_ == kEnableFlag) { std::string dump_step; - if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { + if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS && !dump_step.empty()) { GE_CHK_STATUS_RET(CheckDumpStep(dump_step), "[Check][dump_step] failed."); GELOGI("Get dump step %s successfully", dump_step.c_str()); SetDumpStep(dump_step); diff --git a/ge/common/executor.h b/ge/common/executor.h new file mode 100644 index 00000000..7f1d7ef9 --- /dev/null +++ b/ge/common/executor.h @@ -0,0 +1,89 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GE_COMMON_EXECUTOR_H +#define GE_COMMON_EXECUTOR_H + +#include "external/ge/ge_api_types.h" +#include "graph/ge_local_context.h" +#include "graph/manager/graph_manager_utils.h" + +namespace ge { +struct RunArgs { + GraphNodePtr graph_node; + GraphId graph_id; + uint64_t session_id; + struct error_message::Context error_context; + std::vector input_tensor; + GeRootModelPtr ge_root_model; + GEThreadLocalContext context; + RunAsyncCallback callback; +}; + +class Executor { + public: + /// + /// @ingroup ge + /// @brief Load mode from graph. + /// @param [in] GeRootModel: root model of graph compiled. + /// @param [in] GraphNode: node of graph. + /// @return Status result of function + /// + virtual Status LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) = 0; + + /// + /// @ingroup ge + /// @brief Unload mode. + /// @param [in] GeRootModel: root model of graph compiled. + /// @param [in] graph_id: graph identifier. + /// @return Status result of function + /// + virtual Status UnloadGraph(const GeRootModelPtr &ge_root_model, uint32_t graph_id) = 0; + + /// + /// @ingroup ge + /// @brief Push model execution params to queue. + /// @param [in] RunArgs of for model execution. + /// @return Status result of function + /// + virtual Status PushGraph(const RunArgs &args) = 0; + + /// + /// @ingroup ge + /// @brief Run graph for synchronize model. + /// @param [in] graph_node: node of graph. + /// @param [in] graph_id: graph identifier. + /// @param [in] inputs: input data for the graph running. + /// @param [out] outputs: output data of the graph running + /// @return Status result of function + /// + virtual Status RunGraph(const GraphNodePtr &graph_node, GraphId graph_id, + const std::vector &inputs, std::vector &outputs) = 0; + + /// + /// @ingroup ge + /// @brief Run graph for NN synchronize model. + /// @param [in] graph_node: node of graph. + /// @param [in] graph_id: graph identifier. + /// @param [in] stream: Stream for model running. + /// @param [in] inputs: input data for the graph running. + /// @param [out] outputs: output data of the graph running + /// @return Status result of function + /// + virtual Status RunGraphWithStream(const GraphNodePtr &graph_node, GraphId graph_id, rtStream_t stream, + const std::vector &inputs, std::vector &outputs) = 0; +}; +} +#endif // GE_COMMON_EXECUTOR_H diff --git a/ge/ge_opt_info/ge_opt_info.h b/ge/ge_opt_info/ge_opt_info.h index 935dff25..5cc1063a 100644 --- a/ge/ge_opt_info/ge_opt_info.h +++ b/ge/ge_opt_info/ge_opt_info.h @@ -24,6 +24,7 @@ namespace ge { class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeOptInfo { public: GeOptInfo() = default; + ~GeOptInfo() = default; static Status SetOptInfo(); }; } // namespace ge diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 505b1908..07355ab5 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -808,7 +808,7 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector return SUCCESS; } -Status GeGenerator::InferFormatForSingleOp(OpDescPtr &op_desc) { +Status GeGenerator::InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph) { GE_CHECK_NOTNULL(op_desc); if (OperatorFactoryImpl::GetInferFormatFunc(op_desc->GetType()) != nullptr) { auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_desc->GetType()); @@ -832,7 +832,11 @@ Status GeGenerator::InferFormatForSingleOp(OpDescPtr &op_desc) { } node_op.BreakConnect(); } - auto op = OpDescUtils::CreateOperatorFromOpDesc(op_desc); + auto comp_graph = GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(comp_graph); + auto node = comp_graph->FindNode(op_desc->GetName()); + GE_CHECK_NOTNULL(node); + auto op = OpDescUtils::CreateOperatorFromNode(node); auto ret = op_desc->CallInferFormatFunc(op); if (ret != GRAPH_SUCCESS) { REPORT_INNER_ERROR("E19999", "call InferFormatFunc for single op:%s fail", @@ -879,7 +883,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in Graph graph; GE_CHK_STATUS(BuildSingleOpGraph(op_desc, inputs, outputs, name, graph), "[Build][Graph] for single op:%s fail.", op_desc->GetName().c_str()); - GE_CHK_STATUS_RET_NOLOG(InferFormatForSingleOp(op_desc)); + GE_CHK_STATUS_RET_NOLOG(InferFormatForSingleOp(op_desc, graph)); // 2. check engine type when compile online if (model_file_name == kFileNameSuffix) { diff --git a/ge/graph/common/local_context.cc b/ge/graph/common/local_context.cc index fa2f78e0..bd747021 100644 --- a/ge/graph/common/local_context.cc +++ b/ge/graph/common/local_context.cc @@ -16,13 +16,12 @@ #include "graph/common/local_context.h" -#include "framework/common/ge_inner_error_codes.h" #include "framework/common/debug/ge_log.h" -#include "framework/omg/omg_inner_types.h" namespace ge { namespace { thread_local OmgContext *omg_context = nullptr; +thread_local OmeContext *ome_context = nullptr; } void SetLocalOmgContext(OmgContext &context) { @@ -37,4 +36,18 @@ OmgContext &GetLocalOmgContext() { return domi::GetContext(); } } + +void SetLocalOmeContext(OmeContext &context) { + ome_context = &context; +} + +OmeContext &GetLocalOmeContext() { + if (ome_context != nullptr) { + return *ome_context; + } + + GELOGW("ome_context is nullptr."); + static OmeContext context; + return context; +} } diff --git a/ge/graph/common/local_context.h b/ge/graph/common/local_context.h index 4aa95855..751c6692 100644 --- a/ge/graph/common/local_context.h +++ b/ge/graph/common/local_context.h @@ -22,5 +22,22 @@ namespace ge { void SetLocalOmgContext(OmgContext &context); OmgContext &GetLocalOmgContext(); + + +struct OmeContext { + bool need_multi_batch = false; + std::string dynamic_node_type; + std::vector data_nodes; + std::vector getnext_nosink_nodes; + std::vector dynamic_shape_dims; + std::vector>> user_input_dims; + std::vector> user_real_input_dims; +}; + +GE_FUNC_VISIBILITY +void SetLocalOmeContext(OmeContext &context); + +GE_FUNC_VISIBILITY +OmeContext &GetLocalOmeContext(); } // namespace ge #endif // GE_GRAPH_COMMON_LOCAL_CONTEXT_H_ diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 02d7d3ca..ba35e7c0 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -31,7 +31,6 @@ GraphExecutor::GraphExecutor() sync_run_mutex_(nullptr), condition_(nullptr), graph_run_listener_(nullptr), - graph_context_(nullptr), last_graph_id_(UINT32_MAX), malloc_flag_(false) {} @@ -79,16 +78,6 @@ Status GraphExecutor::SetCondition(std::mutex *mutex, std::condition_variable *c return SUCCESS; } -Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) { - if (graph_context_ptr == nullptr) { - REPORT_INNER_ERROR("E19999", "Check param graph_context_ptr nullptr"); - GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param graph_context_ptr is nullptr"); - return GE_GRAPH_PARAM_NULLPTR; - } - graph_context_ = graph_context_ptr; - return SUCCESS; -} - Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector &batch_num, int32_t dynamic_type) { auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h index 879a124c..b6d56dff 100755 --- a/ge/graph/execute/graph_execute.h +++ b/ge/graph/execute/graph_execute.h @@ -60,8 +60,6 @@ class GraphExecutor { Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr listener); - Status SetGraphContext(GraphContextPtr graph_context_ptr); - static Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num, int32_t dynamic_type); void SetTrainFlag(bool is_train_graph); @@ -160,8 +158,6 @@ class GraphExecutor { // Run graph asynchronous call back listener std::shared_ptr graph_run_listener_; - GraphContextPtr graph_context_; - std::vector outputs_desc_; GraphId last_graph_id_; diff --git a/ge/graph/execute/model_executor.cc b/ge/graph/execute/model_executor.cc new file mode 100644 index 00000000..bcbc08e6 --- /dev/null +++ b/ge/graph/execute/model_executor.cc @@ -0,0 +1,558 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/execute/model_executor.h" + +#include "graph/ge_context.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/common/ge_call_wrapper.h" +#include "graph/common/local_context.h" +#include "graph/manager/graph_var_manager.h" +#include "graph/utils/tensor_adapter.h" +#include "graph/load/graph_loader.h" +#include "graph/load/model_manager/model_manager.h" +#include "common/math/math_util.h" +#include "common/formats/utils/formats_trans_utils.h" + +namespace { +constexpr int32_t kBase = 10; +constexpr uint8_t kNeverLoaded = 0; +} + +namespace ge { +/// +/// @ingroup ge +/// @brief graph executor init +/// @param [in] options user config params +/// @return Status result of function +/// +Status ModelExecutor::Initialize(const map &options, uint64_t session_id) { + graph_run_listener_ = MakeShared(sync_run_mutex_, condition_); + if (graph_run_listener_ == nullptr) { + REPORT_CALL_ERROR("E19999", "New GraphModelListener fail"); + GELOGE(MEMALLOC_FAILED, "[New][GraphModelListener] failed"); + return MEMALLOC_FAILED; + } + + session_id_ = session_id; + train_graph_flag_ = ParseTrainGraphFlag(); + thread_run_flag_.store(true); + run_thread_ = std::thread(&ModelExecutor::RunThread, this); + + init_flag_ = true; + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief graph executor finalize +/// @return Status result of function +/// +Status ModelExecutor::Finalize() { + if (!init_flag_) { + GELOGW("ModelExecutor has not been initialized."); + return SUCCESS; + } + + StopQueue(); + if (run_thread_.joinable()) { + run_thread_.join(); + } + + if (graph_executor_.FreeExecuteMemory() != SUCCESS) { + GELOGW("Graph executor FreeExecuteMemory failed, resources may not be released correctly."); + } + + ModelManager::GetInstance()->DestroyAicpuSession(session_id_); + return SUCCESS; +} + +// OPTION_GRAPH_RUN_MODE is supposed to be a session-level option, but it used to be set to global-level in the past. +// If can not parse from session, it can parse from global by GetContext(). +bool ModelExecutor::ParseTrainGraphFlag() { + string run_mode; + if (GetContext().GetOption(OPTION_GRAPH_RUN_MODE, run_mode) == SUCCESS && !run_mode.empty()) { + if (GraphRunMode(std::strtol(run_mode.c_str(), nullptr, kBase)) >= TRAIN) { + GELOGI("Graph train flag set."); + return true; + } + } + return false; +} + +void ModelExecutor::AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node) { + std::lock_guard lock(mutex_); + graph_nodes_.emplace(graph_id, graph_node); +} + +void ModelExecutor::RemoveGraphNode(GraphId graph_id) { + std::lock_guard lock(mutex_); + graph_nodes_.erase(graph_id); +} + +/// +/// @ingroup ge +/// @brief Load mode for graph. +/// @param [in] GeRootModel: root model of graph compiled. +/// @param [in] GraphNode: node of graph. +/// @return Status result of function +/// +Status ModelExecutor::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { + GE_CHECK_NOTNULL(graph_node); + if (ge_root_model == nullptr) { + return SUCCESS; + } + + UpdateLocalOmeContext(graph_node); + return graph_node->IsAsync() ? ModelLoadAsync(ge_root_model, graph_node) : ModelLoadSync(ge_root_model, graph_node); +} + +/// +/// @ingroup ge +/// @brief Unload mode for graph. +/// @param [in] GeRootModel: root model of graph compiled. +/// @param [in] graph_id: graph identifier. +/// @return Status result of function +/// +Status ModelExecutor::UnloadGraph(const GeRootModelPtr &ge_root_model, uint32_t graph_id) { + GE_CHECK_NOTNULL(ge_root_model); + rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("[GraphExecutor] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), graph_id); + return FAILED; + } + + RemoveGraphNode(graph_id); + Status ret = UnloadModel(ge_root_model, graph_id); + if (ret != SUCCESS) { + GELOGW("[GraphExecutor] unload model failed, graph_id=%u.", graph_id); + } + rt_ret = rtDeviceReset(GetContext().DeviceId()); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("[GraphExecutor] rtDeviceReset failed, graphId=%u.", graph_id); + } + + return ret; +} + +Status ModelExecutor::UnloadModel(const GeRootModelPtr &ge_root_model, uint32_t graph_id) { + GE_CHECK_NOTNULL(ge_root_model); + for (size_t i = 0; i < ge_root_model->GetAllModelId().size(); ++i) { + uint32_t model_id = ge_root_model->GetAllModelId()[i]; + GELOGI("Unload model %u.", model_id); + Status ret = GraphLoader::UnloadModel(model_id); + if (ret != SUCCESS) { + GELOGE(ret, "[GraphExecutor] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); + return ret; + } + } + return SUCCESS; +} + +void ModelExecutor::StopQueue() { + thread_run_flag_.store(false); + run_args_q_.Stop(); +} + +void ModelExecutor::ReturnError(RunAsyncCallback callback, Status ret, const string &log) { + StopQueue(); + GELOGE(ret, "%s.", log.c_str()); + std::vector outputs; + if (callback != nullptr) { + callback(ret, outputs); + } +} + +void ModelExecutor::UpdateLocalOmeContext(const GraphNodePtr &graph_node) { + std::lock_guard lock(mutex_); + SetLocalOmeContext(graph_node->GetOmeContext()); +} + +/// +/// @ingroup ge +/// @brief Push model execution params to queue. +/// @param [in] RunArgs of for model execution. +/// @return Status result of function +/// +Status ModelExecutor::PushGraph(const RunArgs &args) { + return run_args_q_.Push(args) ? SUCCESS : FAILED; +} + +void ModelExecutor::RunThread() { + ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); + if (prctl(PR_SET_NAME, ("GE_Run")) != 0) { + GELOGW("Set thread name failed."); + } + + RunArgs args; + while (thread_run_flag_) { + if (!run_args_q_.Pop(args)) { + continue; + } + + GELOGI("[RunThread] A new loop start, graph_id:%u.", args.graph_id); + ErrorManager::GetInstance().SetErrorContext(args.error_context); + GetContext().SetSessionId(args.session_id); + GetThreadLocalContext() = args.context; + UpdateLocalOmeContext(args.graph_node); + + // parse inputs.dims to vector> dynamic_dims + Status ret = ParseInputsDims(args.input_tensor); + if (ret != SUCCESS) { + ReturnError(args.callback, ret, "ParseInputsDims failed, thread exit."); + args.graph_node->Unlock(); + return; + } + + args.graph_node->UpdateLoadFlag(); + if (!args.graph_node->GetLoadFlag()) { + ErrorManager::GetInstance().SetStage(error_message::kModelLoad, error_message::kModelLoad); + args.ge_root_model->SetTrainFlag(train_graph_flag_); + ret = ModelLoadAsync(args.ge_root_model, args.graph_node); + if (ret != SUCCESS || args.ge_root_model == nullptr) { + StopQueue(); + ReturnError(args.callback, ret, "LoadGraphAsync failed, thread exit."); + args.graph_node->Unlock(); + return; + } + // control the times of graph loading in multi-thread scenario + args.graph_node->DecreaseLoadCount(); + args.graph_node->IncreaseLoadRecord(); + + args.graph_node->SetLoadFlag(true); + GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(), + args.ge_root_model->GetModelId()); + } + + ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); + if (train_graph_flag_) { + graph_executor_.SetTrainFlag(train_graph_flag_); + } + + ret = graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), + args.input_tensor, args.callback); + args.graph_node->SetRunFlag(false); + if (ret != SUCCESS) { + ReturnError(args.callback, ret, "ExecuteGraphAsync failed, thread exit."); + args.graph_node->Unlock(); + return; + } + args.graph_node->Unlock(); + GELOGI("[GraphExecutor] Run graph async success, graph_id=%u.", args.graph_id); + } +} + +/// +/// @ingroup ge +/// @brief Run graph for synchronize model. +/// @param [in] graph_node: node of graph. +/// @param [in] graph_id: graph identifier. +/// @param [in] inputs: input data for the graph running. +/// @param [out] outputs: output data of the graph running +/// @return Status result of function +/// +Status ModelExecutor::RunGraph(const GraphNodePtr &graph_node, GraphId graph_id, + const std::vector &inputs, std::vector &outputs) { + Status ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_); + if (ret != SUCCESS) { + GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Set][Condition] failed, graph_id = %u.", graph_id); + graph_node->SetRunFlag(false); + return GE_GRAPH_RUNGRAPH_FAILED; + } + + if (train_graph_flag_) { + graph_executor_.SetTrainFlag(train_graph_flag_); + } + ret = graph_executor_.ExecuteGraph(graph_id, graph_node->GetGeRootModel(), inputs, outputs); + + graph_node->SetRunFlag(false); + if (ret != SUCCESS) { + GELOGE(ret, "[Execute][Graph] failed, graph_id = %u.", graph_id); + return ret; + } + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Run graph for NN synchronize model. +/// @param [in] graph_node: node of graph. +/// @param [in] graph_id: graph identifier. +/// @param [in] stream: Stream for model running. +/// @param [in] inputs: input data for the graph running. +/// @param [out] outputs: output data of the graph running +/// @return Status result of function +/// +Status ModelExecutor::RunGraphWithStream(const GraphNodePtr &graph_node, GraphId graph_id, rtStream_t stream, + const std::vector &inputs, std::vector &outputs) { + auto ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_); + if (ret != SUCCESS) { + GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Set][Condition] failed, graph id = %u, stream = %p.", graph_id, stream); + graph_node->SetRunFlag(false); + return GE_GRAPH_RUNGRAPH_FAILED; + } + + ret = graph_executor_.ExecuteGraphWithStream(graph_id, stream, graph_node->GetGeRootModel(), inputs, outputs); + graph_node->SetRunFlag(false); + graph_node->SetIsSpecificStream(false); + if (ret != SUCCESS) { + GELOGE(ret, "[Execute][Graph] With Stream failed, graph id = %u, stream = %p.", graph_id, stream); + return ret; + } + GELOGI("[Run][GraphWithStreamAsync] run graph success, graph id = %u, stream = %p.", graph_id, stream); + return SUCCESS; +} + +Status ModelExecutor::ModelLoadSync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { + ge_root_model->SetIsSpecificStream(graph_node->IsSpecificStream()); + return ModelLoad(ge_root_model, graph_node, graph_run_listener_); +} + +Status ModelExecutor::ModelLoadAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { + auto listener = MakeShared(); + GE_CHECK_NOTNULL(listener); + return ModelLoad(ge_root_model, graph_node, listener); +} + +Status ModelExecutor::ModelLoad(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node, + const std::shared_ptr &listener) { + ge_root_model->SetTrainFlag(train_graph_flag_); + bool is_unknown_shape = false; + GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_unknown_shape)); + if (!is_unknown_shape) { + if (getenv(kEnvGeuseStaticMemory) != nullptr) { + GELOGI("[LoadGraph] GE_USE_STATIC_MEMORY is seted."); + } else { + auto root_graph = ge_root_model->GetRootGraph(); + GE_CHECK_NOTNULL(root_graph); + auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); + GeModelPtr ge_model = name_to_model[root_graph->GetName()]; + GE_CHK_STATUS_RET(CheckAndReleaseMemory(ge_model, graph_node)); + } + } + GE_TIMESTAMP_START(LoadModelOnline); + uint32_t model_id = INVALID_MODEL_ID; + Status ret = GraphLoader::LoadModelOnline(model_id, ge_root_model, listener); + GE_TIMESTAMP_EVENT_END(LoadModelOnline, "GraphLoader::LoadModelOnline"); + if (ret != SUCCESS) { + GELOGE(ret, "[Load][ModelOnline] Failed, model_id:%u", model_id); + graph_node->SetRunFlag(false); + return ret; + } + graph_node->SetLoadFlag(true); + ge_root_model->SetModelId(model_id); + graph_node->SetGeRootModel(ge_root_model); + AddGraphNode(graph_node->GetGraphId(), graph_node); + return SUCCESS; +} + +void ModelExecutor::ReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node, + const std::vector &model_ids, uint32_t graph_id, uint64_t session_id) { + rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u", GetContext().DeviceId()); + GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, device_id=%u.", GetContext().DeviceId()); + return; + } + for (auto model_id : model_ids) { + uint64_t max_memory_size = 0; + Status result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size); + if (result != SUCCESS) { + continue; + } + GELOGI("try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, max_memory_size); + if (model_ids.size() > 1) { + result = ge_model->GetSessionId(model_id, session_id); + if (result != SUCCESS) { + GELOGW("[GraphExecutor:] get session failed when dynamic memory, modelId=%u, graphId=%u.", model_id, + graph_id); + continue; + } + } + result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0); + if (result != SUCCESS) { + GELOGW("[GraphExecutor:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id, + graph_id); + } + result = GraphLoader::UnloadModel(model_id); + if (result != SUCCESS) { + GELOGW("[GraphExecutor:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); + } + GELOGI("UnloadGraph[%u], model[%u] success.", graph_id, model_id); + } + graph_node->SetLoadFlag(false); + // Allow model to be loaded agagin without adding graph again + graph_node->SetLoadCount(graph_node->GetLoadRecord()); + graph_node->SetLoadRecord(kNeverLoaded); + GeRootModelPtr ge_root_model = graph_node->GetGeRootModel(); + if (ge_root_model == nullptr) { + GELOGW("ge_root_model is null, graph_id:%u", graph_id); + return; + } + ge_root_model->ClearAllModelId(); + rt_ret = rtDeviceReset(GetContext().DeviceId()); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u", GetContext().DeviceId()); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u.", GetContext().DeviceId()); + return; + } +} + +Status ModelExecutor::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { + GELOGI("graph_id[%u]", graph_node->GetGraphId()); + int64_t free_memory = 0; + Status result = GraphLoader::GetMemoryInfo(free_memory); + if (result != SUCCESS) { + return result; + } + + int64_t value = 0; + int64_t memory_size = AttrUtils::GetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, value) ? value : 0; + int64_t weight_size = AttrUtils::GetInt(ge_model, ATTR_MODEL_WEIGHT_SIZE, value) ? value : 0; + int64_t session_id = AttrUtils::GetInt(ge_model, MODEL_ATTR_SESSION_ID, value) ? value : 0; + + GELOGI("Graph[%u] need memory_size[%ld], weight_size[%ld], Device[%u] free_memory_size[%ld]", + graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); + if (CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) { + REPORT_INNER_ERROR("E19999", "memory_size:%ld and weight_size:%ld will overflow after add, check invalid", + memory_size, weight_size); + GELOGE(INTERNAL_ERROR, "[Check][Param] memory_size:%ld and weight_size:%ld will overflow after add", + memory_size, weight_size); + return INTERNAL_ERROR; + } + if (free_memory >= (memory_size + weight_size)) { + return SUCCESS; + } + + std::lock_guard lock(mutex_); + for (const auto &it : graph_nodes_) { + auto graph_id = it.second->GetGraphId(); + auto model = it.second->GetGeRootModel(); + if (model == nullptr) { + continue; + } + auto model_id = model->GetModelId(); + auto model_ids = model->GetAllModelId(); + // unload model not release + bool is_unknown_shape = false; + GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape)); + if (is_unknown_shape) { + GELOGD("model_id[%u] graph_id[%u] is unknown model, not release memory", model_id, graph_id); + continue; + } + // not loaded,no need unload + if (!it.second->GetLoadFlag()) { + GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id); + continue; + } + ReleaseMemory(ge_model, it.second, model_ids, graph_id, static_cast(session_id)); + } + + return SUCCESS; +} + +void ModelExecutor::ParseInputsDimsForData(const std::vector &input_tensor) { + GELOGD("Start parse input dims from data."); + for (size_t i = 0; i < input_tensor.size(); ++i) { + const TensorDesc &tensor_desc = input_tensor[i].GetTensorDesc(); + const Shape &shape = tensor_desc.GetShape(); + const auto &shape_dims = shape.GetDims(); + GELOGD("Input tensor dims is %s.", formats::JoinToString(shape_dims).c_str()); + GetLocalOmeContext().user_real_input_dims.emplace_back(shape_dims); + } +} + +Status ModelExecutor::ParseInputsDimsForGetNextNoSinkAndData(const vector &dynamic_nodes, + const std::vector &input_tensor) { + GELOGD("Start parse inputs dims when coexist data and getnext sink."); + for (size_t i = 0; i < dynamic_nodes.size(); ++i) { + auto op_desc = dynamic_nodes.at(i)->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + GeAttrValue::INT index = 0; + if (!(AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index))) { + REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail", ATTR_NAME_INDEX.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str()); + GELOGE(PARAM_INVALID, "[Get][Attr] %s from op:%s(%s) fail", ATTR_NAME_INDEX.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str()); + return PARAM_INVALID; + } + if (static_cast(index) > input_tensor.size()) { + REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s) value:%ld > param input_tensor.size:%zu, " + "check invalid", ATTR_NAME_INDEX.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str(), + index, input_tensor.size()); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in op:%s(%s) value:%ld > param input_tensor.size:%zu", + ATTR_NAME_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), + index, input_tensor.size()); + return PARAM_INVALID; + } + + const TensorDesc &tensor_desc = input_tensor[i].GetTensorDesc(); + const Shape &shape = tensor_desc.GetShape(); + const auto &shape_dims = shape.GetDims(); + GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(shape_dims).c_str()); + GetLocalOmeContext().user_real_input_dims.emplace_back(std::move(shape_dims)); + } + return SUCCESS; +} + +Status ModelExecutor::ParseInputsDims(const std::vector &input_tensor) { + GELOGI("Start parse input dims of %zu input tensor.", input_tensor.size()); + GetLocalOmeContext().user_real_input_dims.clear(); + if (GetLocalOmeContext().dynamic_node_type.empty()) { + return SUCCESS; + } + + const vector &data_nodes = GetLocalOmeContext().data_nodes; + const vector &getnext_nosink_nodes = GetLocalOmeContext().getnext_nosink_nodes; + GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(), + getnext_nosink_nodes.size()); + if (GetLocalOmeContext().dynamic_node_type == DATA) { + if (getnext_nosink_nodes.empty()) { + // just data or data+getnext_sink + ParseInputsDimsForData(input_tensor); + } else { + // data+getnext_nosink, but only need to get shape_dims of data + if (ParseInputsDimsForGetNextNoSinkAndData(data_nodes, input_tensor) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Parse][Dims] from data failed, when data coexist with getnext nosink."); + return PARAM_INVALID; + } + } + } else { + if (getnext_nosink_nodes.empty()) { + // just getnext_sink or getnext_sink+data, need to get shape_dims from aicpu op + GELOGI("Need to get dims from aicpu op: GETDYNAMICDIMS."); + return SUCCESS; + } else { + if (data_nodes.empty()) { + // just getnext_nosink + ParseInputsDimsForData(input_tensor); + } else { + // getnext_nosink + data, but only need to get shape_dims of getnext_nosink + if (ParseInputsDimsForGetNextNoSinkAndData(getnext_nosink_nodes, input_tensor) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Parse][Dims] from getnext nosink failed, when data coexist with getnext nosink"); + return PARAM_INVALID; + } + } + } + } + + GELOGI("Parse %zu inputs dims success.", GetLocalOmeContext().user_real_input_dims.size()); + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/execute/model_executor.h b/ge/graph/execute/model_executor.h new file mode 100644 index 00000000..f11441e9 --- /dev/null +++ b/ge/graph/execute/model_executor.h @@ -0,0 +1,140 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GE_GRAPH_EXECUTE_MODEL_EXECUTOR_H +#define GE_GRAPH_EXECUTE_MODEL_EXECUTOR_H + +#include + +#include "common/executor.h" +#include "graph/execute/graph_execute.h" + +namespace ge { +class ModelExecutor : public Executor { + public: + /// + /// @ingroup ge + /// @brief graph executor init + /// @param [in] options user config params + /// @return Status result of function + /// + Status Initialize(const map &options, uint64_t session_id); + + /// + /// @ingroup ge + /// @brief graph executor finalize + /// @return Status result of function + /// + Status Finalize(); + + /// + /// @ingroup ge + /// @brief Load mode for graph. + /// @param [in] GeRootModel: root model of graph compiled. + /// @param [in] GraphNode: node of graph. + /// @return Status result of function + /// + Status LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); + + /// + /// @ingroup ge + /// @brief Unload mode for graph. + /// @param [in] GeRootModel: root model of graph compiled. + /// @param [in] graph_id: graph identifier. + /// @return Status result of function + /// + Status UnloadGraph(const GeRootModelPtr &ge_root_model, uint32_t graph_id); + + /// + /// @ingroup ge + /// @brief Push model execution params to queue. + /// @param [in] RunArgs of for model execution. + /// @return Status result of function + /// + Status PushGraph(const RunArgs &args); + + /// + /// @ingroup ge + /// @brief Run graph for synchronize model. + /// @param [in] graph_node: node of graph. + /// @param [in] graph_id: graph identifier. + /// @param [in] inputs: input data for the graph running. + /// @param [out] outputs: output data of the graph running + /// @return Status result of function + /// + Status RunGraph(const GraphNodePtr &graph_node, GraphId graph_id, + const std::vector &inputs, std::vector &outputs); + + /// + /// @ingroup ge + /// @brief Run graph for NN synchronize model. + /// @param [in] graph_node: node of graph. + /// @param [in] graph_id: graph identifier. + /// @param [in] stream: Stream for model running. + /// @param [in] inputs: input data for the graph running. + /// @param [out] outputs: output data of the graph running + /// @return Status result of function + /// + Status RunGraphWithStream(const GraphNodePtr &graph_node, GraphId graph_id, rtStream_t stream, + const std::vector &inputs, std::vector &outputs); + + private: + bool ParseTrainGraphFlag(); + + void AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node); + void RemoveGraphNode(GraphId graph_id); + + Status ModelLoadSync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); + Status ModelLoadAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); + Status ModelLoad(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node, + const std::shared_ptr &listener); + + Status UnloadModel(const GeRootModelPtr &ge_root_model, uint32_t graph_id); + + void ReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node, const std::vector &model_ids, + uint32_t graph_id, uint64_t session_id); + Status CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node); + + void UpdateLocalOmeContext(const GraphNodePtr &graph_node); + + void RunThread(); + void StopQueue(); + void ReturnError(RunAsyncCallback callback, Status ret, const string &log); + + void ParseInputsDimsForData(const std::vector &input_tensor); + Status ParseInputsDimsForGetNextNoSinkAndData(const vector &dynamic_nodes, + const std::vector &input_tensor); + Status ParseInputsDims(const std::vector &input_tensor); + + bool init_flag_{false}; + bool train_graph_flag_{false}; + uint64_t session_id_{0}; + GraphExecutor graph_executor_; + + std::mutex mutex_; + std::map graph_nodes_; + + std::thread run_thread_; + std::atomic_bool thread_run_flag_{false}; + BlockingQueue run_args_q_; + + // for run graph synchronous return + std::mutex sync_run_mutex_; + std::condition_variable condition_; + // run graph synchronization call back listener + std::shared_ptr graph_run_listener_; +}; +} +#endif // GE_GRAPH_EXECUTE_MODEL_EXECUTOR_H \ No newline at end of file diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 7d82879f..9d86039a 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -1480,6 +1480,11 @@ Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type return SUCCESS; } +void DavinciModel::SetGlobalStep(void *global_step, uint64_t global_step_size) { + global_step_addr_ = global_step; + global_step_size_ = global_step_size; +} + /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. @@ -1542,14 +1547,16 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { } Status DavinciModel::InitVariable(const OpDescPtr &op_desc, map &variable_by_name) { - if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) { - const auto output_sizes = ModelUtils::GetOutputSize(op_desc); - if (!output_sizes.empty()) { - global_step_size_ = output_sizes[0]; - } - const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); - if (!output_addrs.empty()) { - global_step_addr_ = output_addrs[0]; + if (!known_node_) { + if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) { + const auto output_sizes = ModelUtils::GetOutputSize(op_desc); + if (!output_sizes.empty()) { + global_step_size_ = output_sizes[0]; + } + const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); + if (!output_addrs.empty()) { + global_step_addr_ = output_addrs[0]; + } } } @@ -4365,7 +4372,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map void *{ diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 1e964855..4ff36677 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -300,6 +300,7 @@ class DavinciModel { return op_list_.at(index); } + void SetGlobalStep(void *global_step, uint64_t global_step_size); void *GetGlobalStep() const { return global_step_addr_; } // get task info for profiling @@ -498,10 +499,6 @@ class DavinciModel { return exception_dumper_.DumpExceptionInfo(exception_infos); } - void SetKnownShapeGlobalStep(void *global_step) { - known_shape_global_step_ = global_step; - } - void DumperShrink() { data_dumper_.DumpShrink(); } @@ -1108,9 +1105,6 @@ class DavinciModel { vector output_descs_; vector output_formats_; - // known shape node for dump - void *known_shape_global_step_; - // op name to attrs mapping std::map>> op_name_to_attrs_; }; diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index 3c31014d..45540ba0 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -513,8 +513,7 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str()); bool cur_dynamic_dims_valid = false; - std::vector shape_strs = ge::StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';'); - for (auto dynamic_dim : shape_strs) { + for (auto dynamic_dim : GetLocalOmeContext().dynamic_shape_dims) { if (dynamic_dim == formats::JoinToString(cur_dynamic_dims)) { cur_dynamic_dims_valid = true; break; @@ -556,10 +555,10 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector cur_dynamic_dims; - if (!GetLocalOmgContext().user_real_input_dims.empty()) { - if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims, + if (!GetLocalOmeContext().user_real_input_dims.empty()) { + if (GetCurDynamicDims(GetLocalOmeContext().user_real_input_dims, GetLocalOmeContext().user_input_dims, cur_dynamic_dims) != SUCCESS) { GELOGE(INTERNAL_ERROR, "[Get][CurDynamicDims] [Train_Dynamic] Failed to Parse real_dynamic_dims."); return INTERNAL_ERROR; diff --git a/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h index 728305ff..4ae03967 100755 --- a/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/model_manager/task_info/memcpy_async_task_info.h @@ -47,7 +47,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo { uint64_t count_; uint32_t kind_; vector io_addrs_; - int64_t fixed_addr_offset_; + int64_t fixed_addr_offset_ = 0; DavinciModel *davinci_model_ = nullptr; uint32_t args_offset_ = 0; }; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 04e0f51c..a3605ec2 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -129,8 +129,6 @@ const uint32_t kInitGraphCount = 1; const uint32_t kNotAdded = 0; const uint32_t kStartAdd = 1; const uint32_t kDoneAdded = 2; -const uint32_t kNeverLoaded = 0; -const size_t kAlignment = 64; bool IsTailingOptimization() { string is_tailing_optimization_option; @@ -164,26 +162,12 @@ ge::Status CheckFpCeilingMode() { } // namespace namespace ge { -GraphManager::GraphManager() - : thread_run_flag_(false), - graph_run_listener_(nullptr), - init_flag_(false) { -} - -Status GraphManager::Initialize(const std::map &options) { +Status GraphManager::Initialize(const std::map &options, Executor *executor) { ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther); if (init_flag_) { GELOGW("[Initialize] GraphManager already initialized."); return SUCCESS; } - - // malloc - graph_run_listener_ = MakeShared(sync_run_mutex_, condition_); - if (graph_run_listener_ == nullptr) { - REPORT_CALL_ERROR("E19999", "New GraphModelListener fail"); - GELOGE(MEMALLOC_FAILED, "[New][GraphModelListener] failed"); - return MEMALLOC_FAILED; - } // graph context graph_context_ = MakeShared(); if (graph_context_ == nullptr) { @@ -211,31 +195,18 @@ Status GraphManager::Initialize(const std::map &options) { return ret; } - graph_map_.clear(); - cache_helper_map_.clear(); - graph_id_to_add_graph_cond_.clear(); - graph_count_.clear(); + executor_ = executor; init_flag_ = true; thread_run_flag_ = true; - prerun_thread_ = std::thread(GraphManager::PreRunThread, this); - run_thread_ = std::thread(GraphManager::RunThread, this); + prerun_thread_ = std::thread(&GraphManager::PreRunThread, this); return SUCCESS; } Status GraphManager::UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id) { - Status ret = SUCCESS; - for (size_t i = 0; i < ge_root_model->GetAllModelId().size(); ++i) { - uint32_t model_id = ge_root_model->GetAllModelId()[i]; - GELOGI("Unload model %u.", model_id); - ret = GraphLoader::UnloadModel(model_id); - if (ret != SUCCESS) { - GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); - return ret; - } - } - return ret; + GE_CHECK_NOTNULL(executor_); + return executor_->UnloadGraph(ge_root_model, graph_id); } Status GraphManager::Finalize() { @@ -244,23 +215,13 @@ Status GraphManager::Finalize() { return SUCCESS; } - if (graph_executor_.FreeExecuteMemory() != SUCCESS) { - GELOGW("Graph executor FreeExecuteMemory failed, resources may not be released correctly."); - } - - StopQueue(this); - + StopQueue(); if (prerun_thread_.joinable()) { prerun_thread_.join(); } - if (run_thread_.joinable()) { - run_thread_.join(); - } // check graph whether running or not Status unload_model_ret = SUCCESS; - Status ret; - rtError_t rt_ret; for (auto iter = graph_map_.begin(); iter != graph_map_.end(); ++iter) { GraphNodePtr graph_node = iter->second; if (graph_node->GetRunFlag()) { @@ -271,22 +232,10 @@ Status GraphManager::Finalize() { // unload model auto ge_root_model = graph_node->GetGeRootModel(); if (ge_root_model != nullptr && ge_root_model->GetModelId() != INVALID_MODEL_ID && graph_node->GetLoadFlag()) { - rt_ret = rtSetDevice(GetContext().DeviceId()); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("[GraphManager] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), iter->first); - unload_model_ret = FAILED; - continue; - } - ret = UnloadModel(ge_root_model, iter->first); + Status ret = UnloadModel(ge_root_model, iter->first); if (ret != SUCCESS) { - GELOGW("[GraphManager] unload model failed, graph_id=%u.", iter->first); unload_model_ret = ret; - } - rt_ret = rtDeviceReset(GetContext().DeviceId()); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("[GraphManager] rtDeviceReset failed, graphId=%u.", iter->first); - unload_model_ret = FAILED; - continue; + GELOGW("[GraphManager] unload model failed, graph_id=%u.", iter->first); } } @@ -1122,12 +1071,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: return ret; } } - ErrorManager::GetInstance().SetStage(error_message::kModelLoad, error_message::kModelLoad); - if (!graph_node->IsAsync()) { - ret = LoadGraph(ge_root_model, graph_node); - } else { - ret = LoadGraphAsync(ge_root_model, graph_node); - } + ret = LoadGraph(ge_root_model, graph_node); if (ret != SUCCESS) { GELOGE(ret, "[Load][Graph] Failed, graph_id:%u.", graph_node->GetGraphId()); return ret; @@ -1135,13 +1079,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: graph_node->SetBuildFlag(true); var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); } else if (!graph_node->GetLoadFlag()) { - ErrorManager::GetInstance().SetStage(error_message::kModelLoad, error_message::kModelLoad); GeRootModelPtr ge_root_model_ptr = graph_node->GetGeRootModel(); - if (!graph_node->IsAsync()) { - ret = LoadGraph(ge_root_model_ptr, graph_node); - } else { - ret = LoadGraphAsync(ge_root_model_ptr, graph_node); - } + ret = LoadGraph(ge_root_model, graph_node); if (ret != SUCCESS) { GELOGE(ret, "[Load][Graph] Failed, graph_id:%u.", graph_node->GetGraphId()); return ret; @@ -1149,40 +1088,16 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: } return ret; } + Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { GELOGI("[LoadGraph] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); - if (options_.run_graph_flag && ge_root_model != nullptr) { - ge_root_model->SetTrainFlag(GetTrainFlag()); - // synchronization run graph with model - std::shared_ptr model_listener = GetModelListener(); - ModelIdInfo model_id_info; - bool is_unknown_shape = false; - GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_unknown_shape)); - if (!is_unknown_shape) { - if (getenv(kEnvGeuseStaticMemory) != nullptr) { - GELOGI("[LoadGraph] GE_USE_STATIC_MEMORY is seted."); - } else { - auto root_graph = ge_root_model->GetRootGraph(); - GE_CHECK_NOTNULL(root_graph); - auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr ge_model = name_to_model[root_graph->GetName()]; - GE_CHK_STATUS_RET(CheckAndReleaseMemory(ge_model, graph_node)); - } - } - ge_root_model->SetIsSpecificStream(graph_node->IsSpecificStream()); - GE_TIMESTAMP_START(LoadGraph); - Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, model_listener); - GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraph"); - if (ret != SUCCESS) { - GELOGE(ret, "[Load][Model] failed, ret:%d", ret); - graph_node->SetRunFlag(false); - return ret; - } - graph_node->SetLoadFlag(true); - ge_root_model->SetModelId(model_id_info.model_id); - graph_node->SetGeRootModel(ge_root_model); + if (!options_.run_graph_flag) { + return SUCCESS; } - return SUCCESS; + + ErrorManager::GetInstance().SetStage(error_message::kModelLoad, error_message::kModelLoad); + GE_CHECK_NOTNULL(executor_); + return executor_->LoadGraph(ge_root_model, graph_node); } Status GraphManager::LoadFromCache(const GraphNodePtr &graph_node, const ModelCacheHelperPtr &cache_helper, @@ -1272,45 +1187,14 @@ Status GraphManager::SaveCacheAfterBuild(uint32_t graph_id, ge::ComputeGraphPtr Status GraphManager::InnerRunGraph(GraphNodePtr &graph_node, const GraphId &graph_id, const std::vector &inputs, std::vector &outputs) { - Status ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_); - if (ret != SUCCESS) { - GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Set][Condition] failed, graph_id = %u.", graph_id); - graph_node->SetRunFlag(false); - return GE_GRAPH_RUNGRAPH_FAILED; - } - - if (GetTrainFlag()) { - GE_CHK_STATUS_RET(graph_executor_.SetGraphContext(GetGraphContext())); - graph_executor_.SetTrainFlag(options_.train_graph_flag); - } - ret = graph_executor_.ExecuteGraph(graph_id, graph_node->GetGeRootModel(), inputs, outputs); - - graph_node->SetRunFlag(false); - if (ret != SUCCESS) { - GELOGE(ret, "[Execute][Graph] failed, graph_id = %u.", graph_id); - return ret; - } - return SUCCESS; + GE_CHECK_NOTNULL(executor_); + return executor_->RunGraph(graph_node, graph_id, inputs, outputs); } Status GraphManager::InnerRunGraphWithStream(GraphNodePtr &graph_node, const GraphId &graph_id, rtStream_t stream, const std::vector &inputs, std::vector &outputs) { - auto ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_); - if (ret != SUCCESS) { - GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Set][Condition] failed, graph id = %u, stream = %p.", graph_id, stream); - graph_node->SetRunFlag(false); - return GE_GRAPH_RUNGRAPH_FAILED; - } - - ret = graph_executor_.ExecuteGraphWithStream(graph_id, stream, graph_node->GetGeRootModel(), inputs, outputs); - graph_node->SetRunFlag(false); - graph_node->SetIsSpecificStream(false); - if (ret != SUCCESS) { - GELOGE(ret, "[Execute][Graph] With Stream failed, graph id = %u, stream = %p.", graph_id, stream); - return ret; - } - GELOGI("[Run][GraphWithStreamAsync] run graph success, graph id = %u, stream = %p.", graph_id, stream); - return SUCCESS; + GE_CHECK_NOTNULL(executor_); + return executor_->RunGraphWithStream(graph_node, graph_id, stream, inputs, outputs); } Status GraphManager::RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t stream, uint64_t session_id, @@ -1665,38 +1549,18 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { std::lock_guard lock(unload_model_mutex_); - Status middle_ret; - rtError_t rt_ret; var_acc_ctrl_.RemoveGraph(graph_id); RemoveGraphNode(graph_id); - RemoveModelCacheHelper(graph_id); auto ge_root_model = graph_node->GetGeRootModel(); if (CheckModelLoad(ge_root_model, graph_node->GetLoadFlag())) { - rt_ret = rtSetDevice(GetContext().DeviceId()); - if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, graph_id:%u", - GetContext().DeviceId(), graph_id); - GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), - graph_id); - return FAILED; - } - // same graph may be added for several times, different models were created separately, - // unload them respectively. - middle_ret = UnloadModel(ge_root_model, graph_id); + Status middle_ret = UnloadModel(ge_root_model, graph_id); if (middle_ret != SUCCESS) { REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check invalid", graph_id); GELOGE(middle_ret, "[Unload][Model] model failed, graph_id=%u.", graph_id); ret = middle_ret; } - rt_ret = rtDeviceReset(GetContext().DeviceId()); - if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u", - GetContext().DeviceId(), graph_id); - GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, graph_id:%u", GetContext().DeviceId(), graph_id); - ret = FAILED; - } } RemoveCompilerStages(graph_id); @@ -2120,8 +1984,6 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector &outputs) { GELOGI("[GraphManager] CheckpointHandle, outputsSize=%zu.", outputs.size()); - std::vector outputs_desc = graph_executor_.GetOutputsDesc(); - GELOGI("[GraphManager] CheckpointHandle, outputsDescSize=%zu.", outputs_desc.size()); std::map save_results; NodePtr netoutput = nullptr; @@ -2786,160 +2648,6 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr } } -Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { - GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); - if (options_.run_graph_flag && ge_root_model != nullptr) { - ge_root_model->SetTrainFlag(GetTrainFlag()); - // synchronization run graph with model - ModelIdInfo model_id_info; - bool is_unknown_shape = false; - GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_unknown_shape)); - if (!is_unknown_shape) { - if (getenv(kEnvGeuseStaticMemory) != nullptr) { - GELOGI("[LoadGraphAsync] GE_USE_STATIC_MEMORY is seted."); - } else { - auto root_graph = ge_root_model->GetRootGraph(); - GE_CHECK_NOTNULL(root_graph); - auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr ge_model = name_to_model[root_graph->GetName()]; - GE_CHK_STATUS_RET(CheckAndReleaseMemory(ge_model, graph_node)); - } - } - GE_TIMESTAMP_START(LoadGraph); - auto listener = MakeShared(); - GE_CHECK_NOTNULL(listener); - Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, listener); - GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); - if (ret != SUCCESS) { - GELOGE(ret, "[Load][ModelOnline] Failed, model_id:%u", model_id_info.model_id); - graph_node->SetRunFlag(false); - return ret; - } - graph_node->SetLoadFlag(true); - ge_root_model->SetModelId(model_id_info.model_id); - graph_node->SetGeRootModel(ge_root_model); - } - return SUCCESS; -} - -void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, - const std::vector &model_ids, uint32_t graph_id, uint64_t session_id) { - rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); - if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u", GetContext().DeviceId()); - GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, device_id=%u.", GetContext().DeviceId()); - return; - } - for (auto model_id : model_ids) { - uint64_t max_memory_size = 0; - Status result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size); - if (result != SUCCESS) { - continue; - } - GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, - max_memory_size); - if (model_ids.size() > 1) { - result = ge_model->GetSessionId(model_id, session_id); - if (result != SUCCESS) { - GELOGW("[GraphManager:] get session failed when dynamic memory, modelId=%u, graphId=%u.", model_id, - graph_id); - continue; - } - } - result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0); - if (result != SUCCESS) { - GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id, - graph_id); - } - result = GraphLoader::UnloadModel(model_id); - if (result != SUCCESS) { - GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); - } - GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success.", graph_id, model_id); - } - graph_node->SetLoadFlag(false); - // Allow model to be loaded agagin without adding graph again - graph_node->SetLoadCount(graph_node->GetLoadRecord()); - graph_node->SetLoadRecord(kNeverLoaded); - GeRootModelPtr ge_root_model = graph_node->GetGeRootModel(); - if (ge_root_model == nullptr) { - GELOGW("ge_root_model is null, graph_id:%u", graph_id); - return; - } - ge_root_model->ClearAllModelId(); - rt_ret = rtDeviceReset(GetContext().DeviceId()); - if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u", GetContext().DeviceId()); - GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u.", GetContext().DeviceId()); - return; - } -} - -Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { - GELOGI("CheckAndReleaseMemory graph_id[%u]", graph_node->GetGraphId()); - int64_t value = 0; - bool ret = ge::AttrUtils::GetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, value); - int64_t memory_size = ret ? value : 0; - ret = ge::AttrUtils::GetInt(ge_model, ATTR_MODEL_WEIGHT_SIZE, value); - int64_t weight_size = ret ? value : 0; - ret = ge::AttrUtils::GetInt(ge_model, MODEL_ATTR_SESSION_ID, value); - uint64_t session_id = ret ? value : 0; - - int64_t free_memory = 0; - Status result = GraphLoader::GetMemoryInfo(free_memory); - if (result != SUCCESS) { - return result; - } - - GELOGI( - "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," - " Device[%u] free_memory_size[%ld]", - graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); - if (ge::CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) { - REPORT_INNER_ERROR("E19999", "memory_size:%ld and weight_size:%ld will overflow after add, check invalid", - memory_size, weight_size); - GELOGE(INTERNAL_ERROR, "[Check][Param] memory_size:%ld and weight_size:%ld will overflow after add", - memory_size, weight_size); - return INTERNAL_ERROR; - } - if (free_memory >= (memory_size + weight_size)) { - return SUCCESS; - } - - std::lock_guard lock(unload_model_mutex_); - - std::map graph_map; - { - std::lock_guard lock(member_mutex_); - graph_map = graph_map_; - } - - for (auto &it : graph_map) { - auto graph_id = it.second->GetGraphId(); - auto model = it.second->GetGeRootModel(); - if (model == nullptr) { - continue; - } - auto model_id = model->GetModelId(); - auto model_ids = model->GetAllModelId(); - // unload model not release - bool is_unknown_shape = false; - GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape)); - if (is_unknown_shape) { - GELOGD("model_id[%u] graph_id[%u] is unknown model, not release memory", model_id, graph_id); - continue; - } - // not loaded,no need unload - if (!it.second->GetLoadFlag()) { - GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id); - continue; - } - ReleaseMemory(ge_model, it.second, model_ids, graph_id, session_id); - } - - return SUCCESS; -} - Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, const SubGraphInfoPtr &sub_graph_info_ptr, const std::string &root_graph_name, @@ -3069,14 +2777,14 @@ Status GraphManager::IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_m return FAILED; } -Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, +Status GraphManager::CheckIncreBuildAndPreRun(const PreRunArgs &args, GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model) { - if (!graph_manager->IsGraphNeedBuild(graph_node)) { + if (!IsGraphNeedBuild(graph_node)) { ge_root_model = graph_node->GetGeRootModel(); return SUCCESS; } if (graph_node->GetBuildFlag()) { - ReturnError(graph_manager, args.callback, PARAM_INVALID, + ReturnError(args.callback, PARAM_INVALID, "The graph " + std::to_string(graph_node->GetGraphId()) + " need to re-build, you should remove it" " from GE first, then AddGraph again and rebuild it."); @@ -3084,55 +2792,53 @@ Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const } // check need incre build. GeModelPtr ge_model = nullptr; - if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { + if (IncreBuild(graph_node, ge_model) != SUCCESS) { std::vector ge_inputs; for (const auto &item: args.input_tensor) { ge_inputs.emplace_back(TensorAdapter::AsGeTensor(item)); } - Status ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); + Status ret = PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); // release rts generate context RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId()); if (ret != SUCCESS) { - ReturnError(graph_manager, args.callback, ret, "PreRun Failed."); + ReturnError(args.callback, ret, "PreRun Failed."); return ret; } } graph_node->SetBuildFlag(true); - graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); + var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); return SUCCESS; } -void GraphManager::PreRunThread(GraphManager *graph_manager) { +void GraphManager::PreRunThread() { if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { GELOGW("Set thread name failed."); } PreRunArgs args; - while (graph_manager->thread_run_flag_) { - bool pop_status = graph_manager->prerun_args_q_.Pop(args); - if (!pop_status) { + while (thread_run_flag_) { + if (!prerun_args_q_.Pop(args)) { continue; } GELOGI("[PreRunThread] A new loop start, graph_id:%u.", args.graph_id); - ErrorManager::GetInstance().SetErrorContext(args.error_context); ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther); GetContext().SetSessionId(args.session_id); GetThreadLocalContext() = args.context; - graph_manager->UpdateLocalOmgContext(args.graph_id); + UpdateLocalOmgContext(args.graph_id); // find graph GraphNodePtr graph_node = nullptr; - Status ret = graph_manager->GetGraphNode(args.graph_id, graph_node); + Status ret = GetGraphNode(args.graph_id, graph_node); if (ret != SUCCESS) { - ReturnError(graph_manager, args.callback, GE_GRAPH_GRAPH_NODE_NULL, + ReturnError(args.callback, GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph not exist, graph_id=" + std::to_string(args.graph_id)); return; } // more than one graph owns same graph_id uint32_t count = 0; - if (graph_manager->GetGraphCount(args.graph_id, count) != SUCCESS) { + if (GetGraphCount(args.graph_id, count) != SUCCESS) { GELOGE(INTERNAL_ERROR, "[Get][GraphCount] failed, graph id:%u.", args.graph_id); return; } @@ -3142,7 +2848,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { // In online inference concurrency senario, graph_node is allowed to be locked for 'count' times graph_node->SetSemSize(count); graph_node->Lock(); - graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, + PushGraph(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, args.input_tensor, graph_node->GetGeRootModel(), GetThreadLocalContext(), args.callback })); GELOGI("[PreRunThread] Loop end. Start to run with cached build model."); continue; @@ -3151,7 +2857,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { graph_node->Lock(); if (graph_node->GetRunFlag()) { - ReturnError(graph_manager, args.callback, GE_GRAPH_ALREADY_RUNNING, + ReturnError(args.callback, GE_GRAPH_ALREADY_RUNNING, "[RunGraph] graph already running, graph id=" + std::to_string(args.graph_id)); graph_node->Unlock(); return; @@ -3162,25 +2868,25 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph())); if (compute_graph_tmp == nullptr) { - ReturnError(graph_manager, args.callback, GE_GRAPH_GRAPH_NODE_NULL, + ReturnError(args.callback, GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] compute_graph_tmp is NULL, graph id = %u."); graph_node->Unlock(); return; } // when set incre build, save cache helper. - graph_manager->AddModelCacheHelperToMap(args.graph_id, args.session_id, compute_graph_tmp); + AddModelCacheHelperToMap(args.graph_id, args.session_id, compute_graph_tmp); std::vector ge_models; - if (graph_manager->options_.local_fmk_op_flag) { - graph_manager->GetCompilerStages(graph_node->GetGraphId()).optimizer.TranFrameOp(compute_graph_tmp); + if (options_.local_fmk_op_flag) { + GetCompilerStages(graph_node->GetGraphId()).optimizer.TranFrameOp(compute_graph_tmp); } // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. GELOGI("Start for run graph async."); GeRootModelPtr ge_root_model = nullptr; - ret = CheckIncreBuildAndPreRun(graph_manager, args, graph_node, ge_root_model); + ret = CheckIncreBuildAndPreRun(args, graph_node, ge_root_model); if (ret != SUCCESS) { graph_node->SetRunFlag(false); if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) { @@ -3193,250 +2899,49 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { continue; } } - graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, + + PushGraph(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); GELOGI("[PreRunThread] Loop end."); } } -void GraphManager::ParseInputsDimsForData(const std::vector &input_tensor) { - GELOGD("Start parse input dims from data."); - for (size_t i = 0; i < input_tensor.size(); ++i) { - const TensorDesc &tensor_desc = input_tensor[i].GetTensorDesc(); - const Shape &shape = tensor_desc.GetShape(); - const auto &shape_dims = shape.GetDims(); - GELOGD("Input tensor dims is %s.", formats::JoinToString(shape_dims).c_str()); - GetLocalOmgContext().user_real_input_dims.emplace_back(shape_dims); +void GraphManager::PushGraph(const RunArgs &args) { + if (executor_ == nullptr) { + GELOGW("Just compile model, not support execute."); + return; } -} - -Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector &dynamic_nodes, - const std::vector &input_tensor) { - GELOGD("Start parse inputs dims when coexist data and getnext sink."); - for (size_t i = 0; i < dynamic_nodes.size(); ++i) { - auto op_desc = dynamic_nodes.at(i)->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - GeAttrValue::INT index = 0; - if (!(AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index))) { - REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail", ATTR_NAME_INDEX.c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "[Get][Attr] %s from op:%s(%s) fail", ATTR_NAME_INDEX.c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str()); - return PARAM_INVALID; - } - if (static_cast(index) > input_tensor.size()) { - REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s) value:%ld > param input_tensor.size:%zu, " - "check invalid", ATTR_NAME_INDEX.c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str(), - index, input_tensor.size()); - GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in op:%s(%s) value:%ld > param input_tensor.size:%zu", - ATTR_NAME_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), - index, input_tensor.size()); - return PARAM_INVALID; - } - const TensorDesc &tensor_desc = input_tensor[i].GetTensorDesc(); - const Shape &shape = tensor_desc.GetShape(); - const auto &shape_dims = shape.GetDims(); - GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(shape_dims).c_str()); - GetLocalOmgContext().user_real_input_dims.emplace_back(std::move(shape_dims)); - } - return SUCCESS; + (void)executor_->PushGraph(args); } -Status GraphManager::ParseInputsDims(const std::vector &input_tensor) { - GELOGI("Start parse input dims of %zu input tensor.", input_tensor.size()); - GetLocalOmgContext().user_real_input_dims.clear(); - if (!GetLocalOmgContext().dynamic_node_type.empty()) { - vector data_nodes; - vector getnext_nosink_nodes; - data_nodes = GetLocalOmgContext().data_nodes; - getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes; - GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(), - getnext_nosink_nodes.size()); - if (GetLocalOmgContext().dynamic_node_type == DATA) { - if (getnext_nosink_nodes.empty()) { - // just data or data+getnext_sink - ParseInputsDimsForData(input_tensor); - } else { - // data+getnext_nosink, but only need to get shape_dims of data - if (ParseInputsDimsForGetNexNosinkAndData(data_nodes, input_tensor) != SUCCESS) { - GELOGE(PARAM_INVALID, "[Parse][Dims] from data failed, when data coexist with getnext nosink."); - return PARAM_INVALID; - } - } - } else { - if (getnext_nosink_nodes.empty()) { - // just getnext_sink or getnext_sink+data, need to get shape_dims from aicpu op - GELOGI("Need to get dims from aicpu op: GETDYNAMICDIMS."); - return SUCCESS; - } else { - if (data_nodes.empty()) { - // just getnext_nosink - ParseInputsDimsForData(input_tensor); - } else { - // getnext_nosink + data, but only need to get shape_dims of getnext_nosink - if (ParseInputsDimsForGetNexNosinkAndData(getnext_nosink_nodes, input_tensor) != SUCCESS) { - GELOGE(PARAM_INVALID, "[Parse][Dims] from getnext nosink failed, when data coexist with getnext nosink"); - return PARAM_INVALID; - } - } - } - } - } - GELOGI("Parse %zu inputs dims success.", GetLocalOmgContext().user_real_input_dims.size()); - return SUCCESS; -} +void GraphManager::SetRunContext(const GraphNodePtr &graph_node) { + OmeContext ome_context; + ome_context.need_multi_batch = GetLocalOmgContext().need_multi_batch; + ome_context.dynamic_node_type = GetLocalOmgContext().dynamic_node_type; + ome_context.dynamic_shape_dims = StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';'); + ome_context.user_input_dims = GetLocalOmgContext().user_input_dims; -void GraphManager::RunThread(GraphManager *graph_manager) { - ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); - if (prctl(PR_SET_NAME, ("GE_Run")) != 0) { - GELOGW("Set thread name failed."); - } - - RunArgs args; - while (graph_manager->thread_run_flag_) { - bool pop_status = graph_manager->run_args_q_.Pop(args); - if (!pop_status) { - continue; - } - - GELOGI("[RunThread] A new loop start, graph_id:%u.", args.graph_id); - - ErrorManager::GetInstance().SetErrorContext(args.error_context); - GetContext().SetSessionId(args.session_id); - GetThreadLocalContext() = args.context; - graph_manager->UpdateLocalOmgContext(args.graph_id); - - Status ret; - // parse inputs.dims to vector> dynamic_dims - ret = graph_manager->ParseInputsDims(args.input_tensor); - if (ret != SUCCESS) { - ReturnError(graph_manager, args.callback, ret, "ParseInputsDims failed, thread exit."); - args.graph_node->Unlock(); - return; - } - - args.graph_node->UpdateLoadFlag(); - if (!args.graph_node->GetLoadFlag()) { - ErrorManager::GetInstance().SetStage(error_message::kModelLoad, error_message::kModelLoad); - args.ge_root_model->SetTrainFlag(graph_manager->GetTrainFlag()); - ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); - if (ret != SUCCESS || args.ge_root_model == nullptr) { - StopQueue(graph_manager); - ReturnError(graph_manager, args.callback, ret, "LoadGraphAsync failed, thread exit."); - args.graph_node->Unlock(); - return; - } - // control the times of graph loading in multi-thread scenario - args.graph_node->DecreaseLoadCount(); - args.graph_node->IncreaseLoadRecord(); - - args.graph_node->SetLoadFlag(true); - GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(), - args.ge_root_model->GetModelId()); - } + ome_context.data_nodes = GetLocalOmgContext().data_nodes; + ome_context.getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes; - ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute); - if (graph_manager->GetTrainFlag()) { - ret = graph_manager->graph_executor_.SetGraphContext(graph_manager->GetGraphContext()); - if (ret != SUCCESS) { - GELOGW("[GraphManager] SetGraphContext failed, graph_id=%u.", args.graph_id); - } - graph_manager->graph_executor_.SetTrainFlag(graph_manager->options_.train_graph_flag); - } + ome_context.user_real_input_dims = GetLocalOmgContext().user_real_input_dims; - ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), - args.input_tensor, args.callback); - args.graph_node->SetRunFlag(false); - if (ret != SUCCESS) { - ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); - args.graph_node->Unlock(); - return; - } - args.graph_node->Unlock(); - GELOGI("[GraphManager] Run graph async success, graph_id=%u.", args.graph_id); - } + graph_node->SetOmeContext(ome_context); } -void GraphManager::StopQueue(GraphManager *graph_manager) { - if (graph_manager == nullptr) { - return; - } - - graph_manager->thread_run_flag_.store(false); - graph_manager->prerun_args_q_.Stop(); - graph_manager->run_args_q_.Stop(); +void GraphManager::StopQueue() { + thread_run_flag_.store(false); + prerun_args_q_.Stop(); } -void GraphManager::ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log) { - if (graph_manager == nullptr) { - return; - } - StopQueue(graph_manager); +void GraphManager::ReturnError(RunAsyncCallback callback, Status ret, const string &log) { + StopQueue(); GELOGE(ret, "%s.", log.c_str()); std::vector outputs; - callback(ret, outputs); -} - -void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, - Status ret, const string &log) { - std::vector outputs; - auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); - if (graph_manager == nullptr || compute_graph == nullptr) { - REPORT_INNER_ERROR("E19999", "Param graph_manager or compute_graph in graph_node is nullptr, check invalid"); - GELOGE(GRAPH_FAILED, "[Check][Param] compute graph or graph manager is nullptr"); - callback(GRAPH_FAILED, outputs); - return; - } - - for (const auto &node : compute_graph->GetAllNodes()) { - if (node->GetType() != "NetOutput") { - continue; - } - for (size_t i = 0; i < node->GetAllInDataAnchorsSize(); i++) { - auto input_desc = node->GetOpDesc()->MutableInputDesc(i); - GeShape ge_shape(input_desc->GetShape().GetDims()); - GeTensorDesc ge_tensor_desc; - ge_tensor_desc.SetShape(ge_shape); - GeTensor ge_tensor(ge_tensor_desc); - int64_t len = 1; - if (input_desc->GetShape().GetDims() != std::vector({})) { - len = input_desc->GetShape().GetShapeSize(); - } - if (len < 0) { - REPORT_INNER_ERROR("E19999", "InputIndex:%zu ShapeSize:%ld of op:%s(%s) < 0, unknown shape is not support, " - "check invalid", i, len, - node->GetName().c_str(), node->GetType().c_str()); - GELOGE(GRAPH_FAILED, "[Check][Param] InputIndex:%zu ShapeSize:%ld of op:%s(%s) < 0, " - "unknown shape is not support", i, len, node->GetName().c_str(), node->GetType().c_str()); - callback(GRAPH_FAILED, outputs); - return; - } else if (len == 0) { - GELOGI("getted shape size is 0.Do process as empty tensor!"); - len = 1; - } - auto length = GetSizeInBytes(len, input_desc->GetDataType()); - auto aligned_ptr = MakeShared(length, kAlignment); - if (aligned_ptr == nullptr) { - REPORT_CALL_ERROR("E19999", "New AlignedPtr failed, len:%ld", length); - GELOGE(GRAPH_FAILED, "[Create][AlignedPtr] failed, len:%ld", length); - return; - } - ge_tensor.SetData(aligned_ptr, length); - ge::Tensor tensor = TensorAdapter::AsTensor(ge_tensor); - // To avoid global step too small and can not stop, totally set a bigger value - auto ptr = aligned_ptr->MutableGet(); - for (int64_t i = 0; i < length; i++) { - ptr[i] = 0x7F; // here stands for a positive max value - } - outputs.emplace_back(std::move(tensor)); - } + if (callback != nullptr) { + callback(ret, outputs); } - callback(SUCCESS, outputs); - return; } bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) { @@ -3649,6 +3154,7 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp GraphUtils::DumpGEGraph(compute_graph, "Build", is_always_dump); GraphUtils::DumpGEGraphToOnnx(*compute_graph, "Build"); + SetRunContext(graph_node); graph_node->SetGeRootModel(ge_root_model); return SUCCESS; } diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 3475da6d..6773787c 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -31,7 +31,6 @@ #include "external/graph/types.h" #include "external/ge/ge_api_types.h" #include "graph/build/graph_builder.h" -#include "graph/execute/graph_execute.h" #include "graph/ge_local_context.h" #include "graph/load/graph_loader.h" #include "graph/manager/graph_manager_utils.h" @@ -41,11 +40,12 @@ #include "graph/preprocess/graph_preprocess.h" #include "graph/tuning_utils.h" #include "model/ge_model.h" +#include "common/executor.h" namespace ge { class GraphManager { public: - GraphManager(); + GraphManager() = default; ~GraphManager() = default; /// @@ -54,7 +54,7 @@ class GraphManager { /// @param [in] options user config params /// @return Status result of function /// - Status Initialize(const std::map &options); + Status Initialize(const std::map &options, Executor *executor = nullptr); /// /// @ingroup ge_graph @@ -113,7 +113,7 @@ class GraphManager { /// @param [out] outputs output data /// @return Status result of function /// - Status RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t stream, uint64_t session_id, + Status RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t stream, uint64_t session_id, const std::vector &inputs, std::vector &outputs); /// @@ -227,34 +227,18 @@ class GraphManager { RunAsyncCallback callback; }; - struct RunArgs { - GraphNodePtr graph_node; - GraphId graph_id; - uint64_t session_id; - struct error_message::Context error_context; - std::vector input_tensor; - GeRootModelPtr ge_root_model; - GEThreadLocalContext context; - RunAsyncCallback callback; - }; - void AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node); void RemoveGraphNode(GraphId graph_id); bool HasGraphNode(GraphId graph_id); Status GetGraphNode(const GraphId &graph_id, GraphNodePtr &out); - std::shared_ptr GetModelListener() const { return graph_run_listener_; } - static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, const SubGraphInfoPtr &sub_graph_info_ptr, const std::string &root_graph_name, uint64_t session_id, const struct error_message::Context &error_context, const GEThreadLocalContext &ge_context); - Status ParseInputsDims(const std::vector &input_tensor); - void ParseInputsDimsForData(const std::vector &input_tensor); - Status ParseInputsDimsForGetNexNosinkAndData(const vector &dynamic_nodes, - const std::vector &input_tensor); + Status RunCustomPass(const GraphNodePtr &graph_node); Status PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); @@ -350,10 +334,6 @@ class GraphManager { Status SubexpressionMigration(ComputeGraphPtr &compute_graph); - Status LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); - - Status CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node); - bool CheckModelLoad(const GeRootModelPtr &ge_model, bool load_flag); Status LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); @@ -368,12 +348,12 @@ class GraphManager { void RemoveModelCacheHelper(const GraphId &graph_id); ModelCacheHelperPtr FindModelCacheHelper(GraphId graph_id); - static void PreRunThread(GraphManager *graph_manager); - static void RunThread(GraphManager *graph_manager); - static void StopQueue(GraphManager *graph_manager); - static void ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log); - static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, - Status ret, const string &log); + void SetRunContext(const GraphNodePtr &graph_node); + void PushGraph(const RunArgs &args); + + void PreRunThread(); + void StopQueue(); + void ReturnError(RunAsyncCallback callback, Status ret, const string &log); void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); @@ -409,11 +389,7 @@ class GraphManager { CompilerStages &GetCompilerStages(GraphId graph_id); void RemoveCompilerStages(GraphId graph_id); - static Status CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, GraphNodePtr &graph_node, - GeRootModelPtr &ge_root_model); - - void ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, const std::vector &model_ids, - uint32_t graph_id, uint64_t session_id); + Status CheckIncreBuildAndPreRun(const PreRunArgs &args, GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model); Status CheckRepeatAdd(uint32_t graph_id, bool &is_added); @@ -431,34 +407,25 @@ class GraphManager { static Status CheckGraphAdded(const GraphId &graph_id, const Graph &graph); - std::atomic_bool thread_run_flag_; + std::atomic_bool thread_run_flag_{false}; BlockingQueue prerun_args_q_{}; - BlockingQueue run_args_q_{}; std::thread prerun_thread_; - std::thread run_thread_; ComputeGraphPtr compute_graph_; std::map graph_map_; std::map cache_helper_map_; - // for run graph synchronous return - std::mutex sync_run_mutex_; - std::condition_variable condition_; - // run graph synchronization call back listener - std::shared_ptr graph_run_listener_; - // summary and checkpoint callback function list for ME, key is summary or checkpoint std::map &)>> me_callback_map_; std::map &)>> callback_map_; - bool init_flag_; - + bool init_flag_{false}; GraphManagerOptions options_; GraphContextPtr graph_context_ = nullptr; map omg_contexts_; map compiler_stages_; - GraphExecutor graph_executor_; + Executor *executor_{nullptr}; VarAccelerateCtrl var_acc_ctrl_; diff --git a/ge/graph/manager/graph_manager_utils.h b/ge/graph/manager/graph_manager_utils.h index 6ed76e57..9cec6b6d 100644 --- a/ge/graph/manager/graph_manager_utils.h +++ b/ge/graph/manager/graph_manager_utils.h @@ -33,6 +33,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/compute_graph.h" +#include "graph/common/local_context.h" #include "external/graph/graph.h" #include "graph/model.h" #include "model/ge_model.h" @@ -154,6 +155,9 @@ class GraphNode { bool GetRunFlag() const { return run_flag_; } void SetRunFlag(bool flag) { run_flag_ = flag; } + void SetOmeContext(const OmeContext &context) { context_ = context; } + OmeContext &GetOmeContext() { return context_; } + bool IsAsync() const { return async_; } void SetAsync(bool flag) { async_ = flag; } @@ -196,6 +200,8 @@ class GraphNode { bool run_flag_; std::vector subgraph_ptr_list_; + OmeContext context_; + GraphPtr graph_; ComputeGraphPtr compute_graph_; bool build_flag_; diff --git a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc index aa36a43b..67b6c617 100644 --- a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc +++ b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc @@ -145,17 +145,63 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: /// @return /// void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const std::map> &switch_groups) { - for (auto it = switch_groups.begin(); it != switch_groups.end(); ++it) { - const auto &op_node = it->first; - const auto &op_desc = op_node->GetOpDesc(); - if (op_desc->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)) { - continue; + // Step 0: no group assigned. such as: + // Merge1{id=0, group=} => {Switch1{id=1, group=}, Switch2{id=2, group=}} + // Merge2{id=3, group=} => {Switch1{id=1, group=}, Switch3{id=4, group=}} + // Merge3{id=5, group=} => {Switch4{id=6, group=}, Switch5{id=7, group=}} + // Merge4{id=8, group=} => {Switch1{id=1, group=}, Switch5{id=7, group=}} + std::map unique_groups; + const auto get_group_index = [&unique_groups](const NodePtr &merge, const std::vector &switch_group) { + int64_t group_index = merge->GetOpDesc()->GetId(); + std::set group_ids{group_index}; + for (const auto &node : switch_group) { + if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { + GELOGI("[%s] Get group from [%s], index[%ld]", merge->GetName().c_str(), node->GetName().c_str(), group_index); + group_ids.insert(group_index); + } + } + + const auto it = unique_groups.find(group_index); + if (it != unique_groups.end()) { + group_index = it->second; } - int64_t group_index = op_desc->GetId(); - SetControlFlowGroup(op_node, group_index); - for (const auto &n : it->second) { - SetControlFlowGroup(n, group_index); + for (auto id : group_ids) { + unique_groups[id] = group_index; + } + + return group_index; + }; + + const auto set_group_index = [](const NodePtr &merge, const std::vector &switch_group, int64_t group_index) { + SetControlFlowGroup(merge, group_index); + for (const auto &node : switch_group) { + SetControlFlowGroup(node, group_index); + } + }; + + // Step 1: Set group index to merge, if switch already has group, use assigned group. + // Merge1{id=0, group=0} => {Switch1{id=1, group=0}, Switch2{id=2, group=0}} + // Merge2{id=3, group=0} => {Switch1{id=1, group=0}, Switch3{id=4, group=0}} + // Merge3{id=5, group=5} => {Switch4{id=6, group=5}, Switch5{id=7, group=5}} + // Merge4{id=8, group=0} => {Switch1{id=1, group=0}, Switch5{id=7, group=0}} + for (const auto group : switch_groups) { + int64_t group_index = get_group_index(group.first, group.second); + set_group_index(group.first, group.second, group_index); + } + + // Step 2: Adjust crossed merge group for unique group. + // Merge1{id=0, group=0} => {Switch1{id=1, group=0}, Switch2{id=2, group=0}} + // Merge2{id=3, group=0} => {Switch1{id=1, group=0}, Switch3{id=4, group=0}} + // Merge3{id=5, group=0} => {Switch4{id=6, group=0}, Switch5{id=7, group=0}} + // Merge4{id=8, group=0} => {Switch1{id=1, group=0}, Switch5{id=7, group=0}} + for (const auto group : switch_groups) { + int64_t group_index = -1; + (void)AttrUtils::GetInt(group.first->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); + + const auto it = unique_groups.find(group_index); + if (it != unique_groups.end() && it->first != it->second) { + set_group_index(group.first, group.second, it->second); } } } diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index bc8646e7..d7f33b4b 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1756,8 +1756,8 @@ Status GraphPrepare::CtrlFlowPreProcess() { PassManager graph_pass; // After InferShape Mark v1 control flow for unknown shape. - auto mark_force_unknown_pass = new (std::nothrow) MarkForceUnknownForCondPass; - GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::MarkForceUnknownForCondPass", mark_force_unknown_pass)); + GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::MarkForceUnknownForCondPass", + new (std::nothrow) MarkForceUnknownForCondPass)); GE_CHK_STATUS_RET(graph_pass.Run(compute_graph_)); return SUCCESS; diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc index b5e66628..57ba20d4 100644 --- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc +++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc @@ -188,6 +188,7 @@ HybridModelPipelineExecutor::HybridModelPipelineExecutor(HybridModel *model, uin config_.num_executors = kNumExecutors; config_.num_stages = model_->GetRootGraphItem()->NumGroups(); config_.device_id = device_id_; + config_.iteration_end = 0; } Status StageExecutor::InitExecutionContext() { diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index 77bd8efd..f66d4638 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -25,7 +25,7 @@ namespace ge { namespace hybrid { namespace { const uint8_t kMaxTransCount = 3; -const uint32_t kTransOpIoSize = 1; +const uint8_t kTransOpIoSize = 1; const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; const char *const kNodeTypeRetVal = "_RetVal"; const std::set kControlOpTypes{ @@ -47,7 +47,7 @@ bool IsEnterFeedNode(NodePtr node) { // For: Enter -> TransData -> Cast -> node for (uint8_t i = 0; i < kMaxTransCount; ++i) { if (kEnterOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { - GELOGD("Node[%u] is Enter feed node.", node->GetName().c_str()); + GELOGD("Node[%s] is Enter feed node.", node->GetName().c_str()); return true; } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index b34cc0c6..fe9bba9a 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -372,9 +372,6 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { // update op args by tiling info block_dim_ = tiling_info.GetBlockDim(); clear_atomic_ = tiling_info.GetClearAtomic(); - std::vector workspaces; - tiling_info.GetAllWorkspaces(workspaces); - op_desc->SetWorkspaceBytes(workspaces); tiling_data_ = tiling_info.GetAllTilingData().str(); tiling_key_ = tiling_info.GetTilingKey(); @@ -417,6 +414,11 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) GE_CHK_STATUS_RET(optiling::OpParaCalculateV2(*node, tiling_info), "[Invoke][OpParaCalculate]Failed calc tiling data of node %s.", node->GetName().c_str()); + // Only non atomic task need update workspace + auto op_desc = node->GetOpDesc(); + std::vector workspaces; + tiling_info.GetAllWorkspaces(workspaces); + op_desc->SetWorkspaceBytes(workspaces); GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 4db223e0..8b3c691f 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -136,8 +136,7 @@ Status KnownNodeTask::Init(TaskContext &context) { Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) { GELOGD("[Init][DavinciModel] start"); davinci_model_->InitRuntimeParams(); - GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), - "[Init][VariableMem] failed"); + GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "[Init][VariableMem] failed"); int32_t device_id = 0; GE_CHK_RT_RET(rtGetDevice(&device_id)); davinci_model_->SetDeviceId(static_cast(device_id)); @@ -145,8 +144,6 @@ Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *w auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { davinci_model_->SetDumpProperties(dump_properties); - void *global_step = model.GetGlobalStep(); - davinci_model_->SetKnownShapeGlobalStep(global_step); } void *weight = nullptr; @@ -182,6 +179,21 @@ Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons return SUCCESS; } +Status KnownNodeExecutor::SetDaviciModel(const HybridModel &model, const NodePtr &node, + std::shared_ptr &davinci_model) const { + // set known node flag as true + davinci_model->SetKnownNode(true); + davinci_model->SetId(model.GetModelId()); + davinci_model->SetDumpModelName(model.GetModelName()); + davinci_model->SetOmName(model.GetOmName()); + void *global_step = model.GetGlobalStep(); + GE_CHECK_NOTNULL(global_step); + davinci_model->SetGlobalStep(global_step, sizeof(int64_t)); + // set model id as root node's node id + davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); + return SUCCESS; +} + Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GELOGI("[%s] KnownNodeExecutor::LoadTask in.", node->GetName().c_str()); @@ -199,13 +211,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node std::shared_ptr davinci_model = MakeShared(0, nullptr); GE_CHECK_NOTNULL(davinci_model); - // set known node flag as true - davinci_model->SetKnownNode(true); - davinci_model->SetId(model.GetModelId()); - davinci_model->SetDumpModelName(model.GetModelName()); - davinci_model->SetOmName(model.GetOmName()); - // set model id as root node's node id - davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); + GE_CHK_STATUS_RET_NOLOG(SetDaviciModel(model, node, davinci_model)); GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId()); GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), @@ -241,8 +247,7 @@ Status KnownNodeExecutor::ParseAttrForAllocatingOutputs(NodeItem &node_item, Com GE_CHECK_NOTNULL(net_output_desc); std::map connected_inputs; std::map data_indices; - GE_CHK_STATUS_RET(GetDataNodes(graph, data_indices), - "[%s] Failed to get data node indices", + GE_CHK_STATUS_RET(GetDataNodes(graph, data_indices), "[%s] Failed to get data node indices", node_item.NodeName().c_str()); for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) { auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 11cda846..37b5a3d8 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -59,6 +59,8 @@ class KnownNodeExecutor : public NodeExecutor { const NodePtr &node, GeModelPtr &ge_model, ComputeGraphPtr &graph); + Status SetDaviciModel(const HybridModel &model, const NodePtr &node, + std::shared_ptr &davinci_model) const; }; } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index b020208d..757f7593 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -62,7 +62,7 @@ class RdmaNodeTask : public NodeTask { int32_t local_index_ = 0; std::mutex hccl_mutex_; std::condition_variable cond_; - bool skip_flag_; + bool skip_flag_ = false; }; diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index b34871a9..132d4680 100644 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -160,18 +160,6 @@ Status GELib::InnerInitialize(const map &options) { return initOpsBuilderStatus; } - ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther); - GELOGI("sessionManager initial."); - GE_TIMESTAMP_START(SessionManagerInitialize); - Status initSmStatus = sessionManager_.Initialize(options); - GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); - if (initSmStatus != SUCCESS) { - GELOGE(initSmStatus, "[Init][SessionManager] GE session manager initial failed."); - REPORT_CALL_ERROR("E19999", "SessionManager initialize failed."); - RollbackInit(); - return initSmStatus; - } - GELOGI("Start to initialize HostCpuEngine"); GE_TIMESTAMP_START(HostCpuEngineInitialize); Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); @@ -454,12 +442,6 @@ Status GELib::Finalize() { GELOGW("engineManager finalize failed"); final_state = mid_state; } - GELOGI("sessionManager finalization."); - mid_state = sessionManager_.Finalize(); - if (mid_state != SUCCESS) { - GELOGW("sessionManager finalize failed"); - final_state = mid_state; - } GELOGI("opsBuilderManager finalization."); mid_state = OpsKernelBuilderManager::Instance().Finalize(); @@ -539,9 +521,6 @@ void GELib::RollbackInit() { if (opsManager_.init_flag_) { (void)opsManager_.Finalize(); } - if (sessionManager_.init_flag_) { - (void)sessionManager_.Finalize(); - } MemManager::Instance().Finalize(); HostMemManager::Instance().Finalize(); VarManagerPool::Instance().Destory(); diff --git a/ge/init/gelib.h b/ge/init/gelib.h index eb367578..5e66be51 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -22,7 +22,13 @@ #include #include "engine_manager/dnnengine_manager.h" #include "opskernel_manager/ops_kernel_manager.h" -#include "session/session_manager.h" +#include "graph/tuning_utils.h" +#include "graph/operator_factory.h" +#include "graph/ge_local_context.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/anchor_utils.h" +#include "graph/manager/graph_var_manager.h" #include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_types.h" @@ -53,9 +59,6 @@ class GE_FUNC_VISIBILITY GELib { // get OpsKernelManager object OpsKernelManager &OpsKernelManagerObj() { return opsManager_; } - // get SessionManager object - SessionManager &SessionManagerObj() { return sessionManager_; } - // get Initial flag bool InitFlag() const { return init_flag_; } @@ -90,7 +93,6 @@ class GE_FUNC_VISIBILITY GELib { DNNEngineManager engineManager_; OpsKernelManager opsManager_; - SessionManager sessionManager_; std::mutex status_mutex_; bool init_flag_ = false; Options options_; diff --git a/ge/offline/main.cc b/ge/offline/main.cc index bc3b823d..a50ff931 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -1150,9 +1150,9 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { if (ret != SUCCESS) { DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); ret = domi::FAILED; - break; + } else { + GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str()); } - GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str()); index += 1; } diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 58b78f41..fcb9d233 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -30,7 +30,6 @@ #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" #include "graph/common/local_context.h" -#include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/graph_mem_manager.h" #include "graph/utils/tensor_adapter.h" @@ -124,7 +123,7 @@ Status InnerSession::Initialize() { GE_CHK_STATUS_RET(dump_properties.InitByOptions(), "Init dump properties failed."); GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "[Add][DumpProperties] failed."); - ret = graph_manager_.Initialize(options_); + ret = InnerInitialize(); if (ret != SUCCESS) { GELOGE(ret, "[Init][GraphManager] failed, InnerSession:%lu.", session_id_); REPORT_CALL_ERROR("E19999", "GraphManager initialize failed, InnerSession:%lu.", session_id_); @@ -136,7 +135,7 @@ Status InnerSession::Initialize() { if (ret != SUCCESS) { GELOGE(ret, "[Set][MemoryMallocSize] failed."); REPORT_CALL_ERROR("E19999", "VarManager SetMemoryMallocSize failed, InnerSession:%lu.", session_id_); - (void)graph_manager_.Finalize(); + (void)InnerFinalize(); GE_CHK_STATUS(RemoveDumpProperties(), "[Remove][DumpProperties] failed."); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; @@ -162,14 +161,13 @@ Status InnerSession::Finalize() { return SUCCESS; } UpdateThreadContext(std::map{}); - Status ret = graph_manager_.Finalize(); + Status ret = InnerFinalize(); if (ret != SUCCESS) { // Subsequent code execution is required, so no return is required GELOGE(ret, "[Finalize][GraphManager] failed, InnerSession:%lu.", session_id_); REPORT_CALL_ERROR("E19999", "GraphManager Finalize failed, InnerSession:%lu.", session_id_); } - ModelManager::GetInstance()->DestroyAicpuSession(session_id_); init_flag_ = false; // release var memory GELOGI("VarManager free var memory."); @@ -188,6 +186,44 @@ Status InnerSession::Finalize() { return ret; } +Status InnerSession::InnerInitialize() { + Status ret = model_executor_.Initialize(options_, session_id_); + if (ret != SUCCESS) { + GELOGE(ret, "[Init][GraphExecutor] failed, InnerSession:%lu.", session_id_); + REPORT_CALL_ERROR("E19999", "GraphExecutor initialize failed, InnerSession:%lu.", session_id_); + GE_CHK_STATUS(RemoveDumpProperties(), "[Remove][DumpProperties] failed."); + return ret; + } + + ret = graph_manager_.Initialize(options_, &model_executor_); + if (ret != SUCCESS) { + GELOGE(ret, "[Init][GraphManager] failed, InnerSession:%lu.", session_id_); + REPORT_CALL_ERROR("E19999", "GraphManager initialize failed, InnerSession:%lu.", session_id_); + GE_CHK_STATUS(RemoveDumpProperties(), "[Remove][DumpProperties] failed."); + return ret; + } + + return SUCCESS; +} + +Status InnerSession::InnerFinalize() { + Status ret = graph_manager_.Finalize(); + if (ret != SUCCESS) { + // Subsequent code execution is required, so no return is required + GELOGE(ret, "[Finalize][GraphManager] failed, InnerSession:%lu.", session_id_); + REPORT_CALL_ERROR("E19999", "GraphManager Finalize failed, InnerSession:%lu.", session_id_); + } + + ret = model_executor_.Finalize(); + if (ret != SUCCESS) { + // Subsequent code execution is required, so no return is required + GELOGE(ret, "[Finalize][GraphExecutor] failed, InnerSession:%lu.", session_id_); + REPORT_CALL_ERROR("E19999", "GraphExecutor Finalize failed, InnerSession:%lu.", session_id_); + } + + return SUCCESS; +} + Status InnerSession::GetVariable(const std::string &name, Tensor &val) { UpdateThreadContext(std::map{}); return graph_manager_.GetVariable(name, val); diff --git a/ge/session/inner_session.h b/ge/session/inner_session.h index 35fe4692..afc273ac 100644 --- a/ge/session/inner_session.h +++ b/ge/session/inner_session.h @@ -23,6 +23,7 @@ #include "framework/common/ge_types.h" #include "external/ge/ge_api_types.h" #include "graph/manager/graph_manager.h" +#include "graph/execute/model_executor.h" namespace ge { class InnerSession { @@ -82,10 +83,14 @@ class InnerSession { void SetRtSocVersion(); private: + Status InnerInitialize(); + Status InnerFinalize(); + bool init_flag_; uint64_t session_id_; std::map options_; GraphManager graph_manager_; + ModelExecutor model_executor_; std::mutex resource_mutex_; // AddGraph, RemoveGraph and Finalize use void UpdateThreadContext(const std::map &options); void UpdateThreadContext(uint32_t graph_id); diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index fdf37d06..486dfd58 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -20,7 +20,6 @@ #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" -#include "graph/load/model_manager/model_manager.h" #include "graph/manager/util/rt_context_util.h" using std::map; @@ -105,10 +104,6 @@ Status SessionManager::DestroySession(SessionId session_id) { return GE_SESSION_NOT_EXIST; } - if (ModelManager::GetInstance() != nullptr) { - ModelManager::GetInstance()->DestroyAicpuSession(session_id); - } - // Unified destruct rt_context RtContextUtil::GetInstance().DestroyRtContexts(session_id); diff --git a/ge/session/session_manager.h b/ge/session/session_manager.h index 4c3429c2..4a0b9d66 100644 --- a/ge/session/session_manager.h +++ b/ge/session/session_manager.h @@ -31,9 +31,26 @@ namespace ge { using SessionPtr = std::shared_ptr; class SessionManager { - friend class GELib; - public: + SessionManager() = default; + + ~SessionManager() = default; + + /// + /// @ingroup ge_session + /// @brief initialize session manager + /// @param [in] options session manager config options + /// @return Status result of function + /// + Status Initialize(const std::map &options); + + /// + /// @ingroup ge_session + /// @brief finalize session manager + /// @return Status result of function + /// + Status Finalize(); + /// /// @ingroup ge_session /// @brief create session @@ -181,25 +198,6 @@ class SessionManager { bool IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id); private: - SessionManager() = default; - - ~SessionManager() = default; - - /// - /// @ingroup ge_session - /// @brief initialize session manager - /// @param [in] options session manager config options - /// @return Status result of function - /// - Status Initialize(const std::map &options); - - /// - /// @ingroup ge_session - /// @brief finalize session manager - /// @return Status result of function - /// - Status Finalize(); - bool HasSession(SessionId session_id); Status GetNextSessionId(SessionId &next_session_id); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 7aad3e8f..acf80afa 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -46,7 +46,12 @@ namespace { const size_t kDataOutputNum = 1; const uint32_t kInputIndexOfData = 0; const uint32_t kOutputIndexOfData = 0; +const size_t kNumTaskWithAtomicAddrCleanTask = 2; +const size_t kNumTaskWithMemCpyTask = 2; constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; +const char *const kEngineNameAiCore = "AIcoreEngine"; +const char *const kEngineNameAiCpu = "aicpu_ascend_kernel"; +const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; Status CheckHostMem(const std::vector &dependencies, const NodePtr &node, bool &is_host_mem) { auto op_desc = node->GetOpDesc(); @@ -395,7 +400,7 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { } } } - + Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { GE_CHECK_NOTNULL(task); auto task_type = static_cast(task_def.type()); @@ -408,7 +413,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * return ACL_ERROR_GE_INTERNAL_ERROR; } - auto *tbe_task = new (std::nothrow) TbeOpTask(); + std::unique_ptr tbe_task(new (std::nothrow) TbeOpTask()); if (tbe_task == nullptr) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][TbeOpTask]failed."); REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new TbeOpTask."); @@ -418,12 +423,41 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); auto ret = builder.BuildTask(*tbe_task, model_params_); if (ret != SUCCESS) { - delete tbe_task; - tbe_task = nullptr; + GELOGE(ret, "[Build][TbeOpTask]failed."); + REPORT_INNER_ERROR("E19999", "[Build][TbeOpTask]failed."); return ret; } - *task = tbe_task; + *task = tbe_task.release(); + return SUCCESS; +} + +Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task) { + GE_CHECK_NOTNULL(task); + const auto &context = task_def.kernel().context(); + auto iter = op_list_.find(context.op_index()); + if (iter == op_list_.end()) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:TaskDef]op desc not found. op index = %u", context.op_index()); + REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for op desc not found. op index = %u", context.op_index()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + + std::unique_ptr atomic_task(new (std::nothrow) AtomicAddrCleanOpTask()); + if (atomic_task == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicAddrCleanOpTask]failed."); + REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicAddrCleanOpTask."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + + auto builder = AtomicAddrCleanTaskBuilder(model_name_, iter->second, task_def); + auto ret = builder.BuildTask(*atomic_task, model_params_); + if (ret != SUCCESS) { + GELOGE(ret, "[Build][AtomicAddrCleanOpTask]failed."); + REPORT_INNER_ERROR("E19999", "[Build][AtomicAddrCleanOpTask]failed."); + return ret; + } + + *task = atomic_task.release(); return SUCCESS; } @@ -536,9 +570,29 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); GE_CHECK_NOTNULL(compute_graph); single_op.compute_graph_ = compute_graph; - if (tbe_tasks_.size() > 0) { - const auto &task_def = tbe_tasks_[0]; + + if (node_tasks_.size() != 1) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + REPORT_INNER_ERROR("E19999", "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + return ACL_ERROR_GE_PARAM_INVALID; + } + + auto iter = node_tasks_.begin(); + auto node = iter->first; + const auto &task_defs = iter->second; + if (task_defs.size() <= 0 || task_defs.size() > kNumTaskWithAtomicAddrCleanTask) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + REPORT_INNER_ERROR("E19999", "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size()); + return ACL_ERROR_GE_PARAM_INVALID; + } + + GE_CHECK_NOTNULL(node); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &lib_name = op_desc->GetOpKernelLibName(); + if (lib_name == kEngineNameAiCore) { GELOGD("Building TBE task."); + const auto &task_def = task_defs.back(); TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); tbe_task->SetModelArgs(model_name_, model_id_); @@ -546,46 +600,52 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, GELOGD("tiling buffer is not nullptr."); tbe_task->stream_resource_ = stream_resource; } + if (task_defs.size() == kNumTaskWithAtomicAddrCleanTask) { + const auto &atomic_task_def = task_defs.front(); + AtomicAddrCleanOpTask *atomic_task = nullptr; + GE_CHK_STATUS_RET_NOLOG(BuildAtomicTask(atomic_task_def, &atomic_task)); + GE_CHK_STATUS_RET_NOLOG(atomic_task->InitAtomicAddrCleanIndices()); + tbe_task->SetAtomicAddrCleanTask(atomic_task); + } single_op.op_task_.reset(tbe_task); - } else if (aicpu_tasks_.size() > 0) { - const auto &task_def = aicpu_tasks_[0]; - auto task_type = static_cast(task_def.type()); - if (task_type == RT_MODEL_TASK_KERNEL) { - GELOGD("Building AICPU_CC task"); - AiCpuCCTask *task = nullptr; - uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; - GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); - GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); - if (task->GetUnknownType() == DEPEND_COMPUTE) { - if (aicpu_tasks_.size() < 2) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); - REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); - return ACL_ERROR_GE_PARAM_INVALID; - } - const TaskDef ©_task_def = aicpu_tasks_[1]; - GE_CHK_STATUS_RET_NOLOG(task->SetMemCopyTask(copy_task_def.kernel())); + } else if (lib_name == kEngineNameAiCpu) { + const auto &task_def = task_defs[0]; + GELOGD("Building AICPU_CC task"); + AicpuCCTask *task = nullptr; + uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; + GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); + GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); + if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { + if (task_defs.size() < kNumTaskWithMemCpyTask) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); + REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); + return ACL_ERROR_GE_PARAM_INVALID; } - task->SetModelArgs(model_name_, model_id_); - single_op.op_task_.reset(task); - } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { - GELOGD("Building AICPU_TF task"); - AiCpuTask *aicpu_task = nullptr; - uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; - GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); - GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); - if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { - if (aicpu_tasks_.size() < 2) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); - REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); - return ACL_ERROR_GE_PARAM_INVALID; - } - const TaskDef ©_task_def = aicpu_tasks_[1]; - GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); + const TaskDef ©_task_def = task_defs[1]; + GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel())); + } + task->SetModelArgs(model_name_, model_id_); + single_op.op_task_.reset(task); + } else if (lib_name == kEngineNameAiCpuTf) { + const auto &task_def = task_defs[0]; + GELOGD("Building AICPU_TF task"); + AiCpuTask *aicpu_task = nullptr; + uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; + GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); + GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); + if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { + if (task_defs.size() < kNumTaskWithMemCpyTask) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); + REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); + return ACL_ERROR_GE_PARAM_INVALID; } - aicpu_task->SetModelArgs(model_name_, model_id_); - single_op.op_task_.reset(aicpu_task); + const TaskDef ©_task_def = task_defs[1]; + GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); } + aicpu_task->SetModelArgs(model_name_, model_id_); + single_op.op_task_.reset(aicpu_task); } + return SUCCESS; } @@ -594,9 +654,7 @@ Status SingleOpModel::NeedHybridModel(GeModelPtr &ge_model, bool &need_hybrid_mo bool is_host_mem = false; GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); bool need_d2h_cpy = is_infer_depend && !is_host_mem; - bool aicpu_multi_task = tbe_tasks_.size() >= 1 && aicpu_tasks_.size() >= 1; - bool aicore_multi_task = tbe_tasks_.size() > 1; - need_hybrid_model = need_d2h_cpy || aicore_multi_task || aicpu_multi_task; + need_hybrid_model = need_d2h_cpy || node_tasks_.size() > 1; return SUCCESS; } @@ -610,31 +668,27 @@ Status SingleOpModel::ParseTasks() { GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), task_def.DebugString().c_str()); auto task_type = static_cast(task_def.type()); + uint32_t op_index = 0; if (task_type == RT_MODEL_TASK_KERNEL) { - const auto &kernel_def = task_def.kernel(); - const auto &context = kernel_def.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == ccKernelType::TE) { - tbe_tasks_.emplace_back(task_def); - } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { - aicpu_tasks_.emplace_back(task_def); - } else { - GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, - "[Check][Param:TaskDef]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", - context.kernel_type()); - REPORT_INNER_ERROR("E19999", - "BuildModelTaskKernel fail for got:%u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.", - context.kernel_type()); - return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; - } - } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { - tbe_tasks_.emplace_back(task_def); + op_index = task_def.kernel().context().op_index(); } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { - aicpu_tasks_.emplace_back(task_def); + op_index = task_def.kernel_ex().op_index(); + } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { + op_index = task_def.kernel_with_handle().context().op_index(); } else { - // skip GELOGD("Skip task type: %d", static_cast(task_type)); + continue; + } + GELOGD("op_index = %u, task_type = %d", op_index, task_type); + + auto iter = op_list_.find(op_index); + if (iter == op_list_.end()) { + GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by op_index = %u", op_index); + REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u.", op_index); + return INTERNAL_ERROR; } + auto &node = iter->second; + node_tasks_[node].emplace_back(task_def); } return SUCCESS; } diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index c3060115..22ee11b2 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -69,6 +69,7 @@ class SingleOpModel { Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); + Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, AiCpuCCTask **task, uint64_t kernel_id); @@ -79,9 +80,7 @@ class SingleOpModel { Status NeedHybridModel(GeModelPtr &ge_model, bool &flag); Status ParseTasks(); - std::vector tbe_tasks_; - std::vector aicpu_tasks_; - + std::map> node_tasks_; std::string model_name_; uint32_t model_id_ = 0; const void *ori_model_data_; diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index d78a2439..03fc3cb1 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -27,7 +27,6 @@ #include "common/formats/formats.h" #include "common/math/math_util.h" #include "framework/common/debug/log.h" -#include "register/op_tiling.h" #include "runtime/rt.h" #include "single_op/task/build_task_utils.h" @@ -222,19 +221,26 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status TbeOpTask::UpdateRunInfo() { - // invoke OpParaCalculate - GELOGD("Start to invoke OpParaCalculate."); - optiling::utils::OpRunInfo run_info(0, true, 0); +Status TbeOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { auto ret = optiling::OpParaCalculateV2(*node_, run_info); if (ret != GRAPH_SUCCESS) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpParaCalculate] failed, ret = %u.", ret); REPORT_INNER_ERROR("E19999", "invoke OpParaCalculate failed, ret = %u.", ret); return ACL_ERROR_GE_INTERNAL_ERROR; } + return SUCCESS; +} + +Status TbeOpTask::UpdateRunInfo() { + // invoke OpParaCalculate + GELOGD("Start to invoke OpParaCalculate."); + optiling::utils::OpRunInfo run_info(0, true, 0); + GE_CHK_STATUS_RET(CalcTilingInfo(run_info), "[Calc][TilingInfo]failed."); + block_dim_ = run_info.GetBlockDim(); tiling_data_ = run_info.GetAllTilingData().str(); tiling_key_ = run_info.GetTilingKey(); + clear_atomic_ = run_info.GetClearAtomic(); run_info.GetAllWorkspaces(run_info_workspaces_); GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, tiling_data_.size(), tiling_key_); @@ -262,7 +268,6 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc dst_tensor.SetShape(GeShape(std::move(storage_shape))); dst_tensor.SetOriginShape(src_tensor.GetShape()); } - return SUCCESS; } @@ -346,6 +351,17 @@ Status TbeOpTask::AllocateWorkspaces(const vector &workspace_sizes) { return SUCCESS; } +Status TbeOpTask::CheckAndExecuteAtomic(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers, + rtStream_t stream) { + if (clear_atomic_ && atomic_task_ != nullptr) { + return atomic_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream); + } + return SUCCESS; +} + Status TbeOpTask::UpdateTilingArgs(rtStream_t stream) { size_t args_size = input_num_ + output_num_ + workspaces_.size(); if (tiling_buffer_ != nullptr) { @@ -433,6 +449,8 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); + GE_CHK_STATUS_RET(CheckAndExecuteAtomic(input_desc, input_buffers, output_desc, output_buffers, stream), + "[Execute][AtomicTask] failed."); GE_CHK_STATUS_RET(UpdateTilingArgs(stream), "[Update][TilingArgs] failed."); GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); @@ -463,6 +481,85 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { } } +Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector &input_desc, + const vector &output_desc) { + return SUCCESS; +} + +Status AtomicAddrCleanOpTask::UpdateIoAddr(const vector &inputs, const vector &outputs) { + uintptr_t *arg_base = reinterpret_cast(args_.get()); + for (auto atomic_output_index : atomic_output_indices_) { + if (atomic_output_index >= static_cast(outputs.size())) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Update][Args] failed, atomic index must smaller then data size."); + REPORT_INNER_ERROR("E19999", "[Update][Args] failed, atomic index must smaller then data size."); + return ACL_ERROR_GE_PARAM_INVALID; + } + auto &output_buffer = outputs[atomic_output_index]; + *arg_base++ = reinterpret_cast(output_buffer.data); + + auto tensor_desc = op_desc_->MutableOutputDesc(atomic_output_index); + int64_t size = 0; + graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, size); + if (graph_status != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed!"); + GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed!"); + return FAILED; + } + TensorUtils::SetSize(*tensor_desc, size); + } + return SUCCESS; +} + +Status AtomicAddrCleanOpTask::UpdateTilingArgs(rtStream_t stream) { + if (tiling_buffer_ != nullptr) { + GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); + GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), + RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); + uintptr_t *arg_base = reinterpret_cast(args_.get()); + size_t idx = atomic_output_indices_.size(); + arg_base[idx] = reinterpret_cast(tiling_buffer_); + } + return SUCCESS; +} + +Status AtomicAddrCleanOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { + auto ret = optiling::OpAtomicCalculateV2(*node_, run_info); + if (ret != GRAPH_SUCCESS) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpAtomicCalculate] failed, ret = %u.", ret); + REPORT_INNER_ERROR("E19999", "invoke OpAtomicCalculate failed, ret = %u.", ret); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + return SUCCESS; +} + +Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices() { + GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc_->GetName().c_str()); + std::vector atomic_output_indices; + (void) ge::AttrUtils::GetListInt(op_desc_, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); + if (atomic_output_indices.empty()) { + GELOGE(INTERNAL_ERROR, "[Check][Size][%s] atomic_output_indices must not be empty.", op_desc_->GetName().c_str()); + REPORT_INNER_ERROR("E19999", "[%s] atomic_output_indices must not be empty.", op_desc_->GetName().c_str()); + return INTERNAL_ERROR; + } + + size_t max_arg_size = tiling_buffer_ == nullptr ? arg_size_ : arg_size_ - 1; + if (atomic_output_indices.size() > max_arg_size) { + GELOGE(INTERNAL_ERROR, "[Check][Size][%s] atomic_output_indices invalid. atomic_output_indices size is %zu," + "arg size is %zu.", op_desc_->GetName().c_str(), atomic_output_indices.size(), arg_size_); + REPORT_INNER_ERROR("E19999", "[%s] atomic_output_indices invalid. atomic_output_indices size is %zu," + "arg size is %zu.", op_desc_->GetName().c_str(), atomic_output_indices.size(), arg_size_); + return INTERNAL_ERROR; + } + + for (auto output_index : atomic_output_indices) { + GELOGD("[%s] Adding output index [%ld]", op_desc_->GetName().c_str(), output_index); + GE_CHECK_GE(output_index, 0); + GE_CHECK_LE(output_index, INT32_MAX); + atomic_output_indices_.emplace_back(static_cast(output_index)); + } + return SUCCESS; +} + AiCpuBaseTask::~AiCpuBaseTask() { if (ext_info_addr_dev_ != nullptr) { (void)rtFree(ext_info_addr_dev_); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 248ccf97..e6142f4e 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -89,6 +89,7 @@ class TbeOpTask : public OpTask { void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); void SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); + void SetAtomicAddrCleanTask(OpTask *task) { atomic_task_.reset(task); } Status UpdateRunInfo() override; Status SetArgIndex(); @@ -100,38 +101,63 @@ class TbeOpTask : public OpTask { const std::string &GetTaskType() const override; void SetHandle(void *handle); + protected: + NodePtr node_; + std::unique_ptr args_; + size_t arg_size_ = 0; + void *tiling_buffer_ = nullptr; + uint32_t max_tiling_size_ = 0; + std::string tiling_data_; + size_t input_num_; // include const input + size_t output_num_; + private: friend class SingleOpModel; friend class TbeTaskBuilder; static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); - Status UpdateNodeByShape(const vector &input_desc, - const vector &output_desc); Status AllocateWorkspaces(const std::vector &workspace_sizes); - Status UpdateTilingArgs(rtStream_t stream); Status DoLaunchKernel(rtStream_t stream); - Status UpdateIoAddr(const vector &inputs, const vector &outputs); + Status CheckAndExecuteAtomic(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers, + rtStream_t stream); + virtual Status UpdateNodeByShape(const vector &input_desc, + const vector &output_desc); + virtual Status UpdateTilingArgs(rtStream_t stream); + virtual Status UpdateIoAddr(const vector &inputs, const vector &outputs); + virtual Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info); const void *stub_func_ = nullptr; - std::unique_ptr args_; - size_t arg_size_ = 0; void *sm_desc_ = nullptr; std::string stub_name_; - StreamResource *stream_resource_ = nullptr; - void *tiling_buffer_ = nullptr; - uint32_t max_tiling_size_ = 0; - std::string tiling_data_; + std::vector run_info_workspaces_; std::vector workspaces_; - NodePtr node_; uint32_t tiling_key_ = 0; + bool clear_atomic_ = false; void* handle_ = nullptr; std::string original_kernel_key_; std::string node_info_; std::vector arg_index_; // data index in args - size_t input_num_; // include const input - size_t output_num_; + + std::unique_ptr atomic_task_; +}; + +class AtomicAddrCleanOpTask : public TbeOpTask { + public: + Status InitAtomicAddrCleanIndices(); + + private: + Status UpdateNodeByShape(const vector &input_desc, + const vector &output_desc) override; + Status UpdateIoAddr(const vector &inputs, const vector &outputs) override; + Status UpdateTilingArgs(rtStream_t stream) override; + Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; + std::vector atomic_output_indices_; + }; class AiCpuBaseTask : public OpTask { @@ -280,7 +306,7 @@ class MemcpyAsyncTask : public OpTask { friend class SingleOpModel; friend class RtsKernelTaskBuilder; - uintptr_t addresses_[kAddressNum]; + uintptr_t addresses_[kAddressNum] = {0}; size_t dst_max_; size_t count_; rtMemcpyKind_t kind_; diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index c1bafed8..017dac25 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -29,15 +29,8 @@ namespace ge { namespace { constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; constexpr char const *kAttrOpParamSize = "op_para_size"; +constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; std::mutex g_reg_mutex; - -inline void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) { - (void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); -} - -inline TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) { - return op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); -} } // namespace KernelHolder::KernelHolder(const char *stub_func, std::shared_ptr kernel_bin) @@ -96,7 +89,15 @@ TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &nod task_def_(task_def), kernel_def_(task_def.kernel()), kernel_def_with_handle_(task_def.kernel_with_handle()), - stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} + model_name_(model_name) {} + +TBEKernelPtr TbeTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const { + return op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); +} + +void TbeTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const { + (void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); +} Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const { @@ -124,7 +125,7 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi Status TbeTaskBuilder::DoRegisterMeta(void *bin_handle) { std::string meta_data; - (void)AttrUtils::GetStr(op_desc_, TVM_ATTR_NAME_METADATA, meta_data); + (void)AttrUtils::GetStr(op_desc_, GetKeyForTvmMetaData(), meta_data); GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); if (!meta_data.empty()) { auto rt_ret = rtMetadataRegister(bin_handle, meta_data.c_str()); @@ -307,6 +308,15 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m return SUCCESS; } +Status TbeTaskBuilder::InitKernelArgs(void *arg_addr, size_t arg_size, const SingleOpModelParam ¶m) { + // copy args + std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); + void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); + uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); + GE_CHK_RT_RET(rtMemcpy(arg_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); + return SUCCESS; +} + Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { auto task_type = static_cast(task_def_.type()); bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); @@ -331,12 +341,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & kernel_def_with_handle_.context() : kernel_def_.context(); const auto *args_offset_tmp = reinterpret_cast(context.args_offset().data()); uint16_t offset = *args_offset_tmp; - - // copy args - std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); - void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); - uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); - GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); + GE_CHK_STATUS_RET_NOLOG(InitKernelArgs(args.get() + offset, arg_size - offset, param)); if (is_task_all_kernel) { task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, @@ -367,8 +372,15 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ } auto task_type = static_cast(task_def_.type()); - ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : - RegisterKernel(task, param); + if (task_type == RT_MODEL_TASK_ALL_KERNEL) { + stub_name_ = model_name_ + "/" + node_->GetName() + "_tvmbin"; + ret = RegisterKernelWithHandle(task, param); + } else { + const domi::KernelDef &kernel_def = task_def_.kernel(); + stub_name_ = model_name_ + "/" + kernel_def.stub_func() + "_tvmbin"; + ret = RegisterKernel(task, param); + } + task.SetHandle(handle_); if (ret != SUCCESS) { return ret; @@ -397,8 +409,8 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { GELOGD("Start alloc tiling data of node %s.", op_desc_->GetName().c_str()); int64_t max_size = -1; - (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); - GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); + (void)AttrUtils::GetInt(op_desc_, GetKeyForOpParamSize(), max_size); + GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); if (max_size < 0) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Get][Int] %s Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); @@ -439,4 +451,32 @@ Status TbeTaskBuilder::GetMagic(uint32_t &magic) const { return SUCCESS; } +std::string TbeTaskBuilder::GetKeyForOpParamSize() const { + return kAttrOpParamSize; +} + +std::string TbeTaskBuilder::GetKeyForTvmMetaData() const { + return TVM_ATTR_NAME_METADATA; +} + +Status AtomicAddrCleanTaskBuilder::InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m) { + return SUCCESS; +} + +std::string AtomicAddrCleanTaskBuilder::GetKeyForOpParamSize() const { + return kAttrAtomicOpParamSize; +} + +std::string AtomicAddrCleanTaskBuilder::GetKeyForTvmMetaData() const { + return ATOMIC_ATTR_TVM_METADATA; +} + +void AtomicAddrCleanTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const { + (void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); +} + +TBEKernelPtr AtomicAddrCleanTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const { + return op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, TBEKernelPtr()); +} + } // namespace ge diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h index 6252feea..06d17901 100755 --- a/ge/single_op/task/tbe_task_builder.h +++ b/ge/single_op/task/tbe_task_builder.h @@ -90,10 +90,17 @@ class HandleRegistry { class TbeTaskBuilder { public: TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); - ~TbeTaskBuilder() = default; + virtual ~TbeTaskBuilder() = default; Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); + protected: + virtual std::string GetKeyForOpParamSize() const; + virtual std::string GetKeyForTvmMetaData() const; + virtual TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) const; + virtual void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const; + virtual Status InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m); + private: Status InitTilingInfo(TbeOpTask &task); Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); @@ -114,9 +121,24 @@ class TbeTaskBuilder { const domi::TaskDef &task_def_; const domi::KernelDef &kernel_def_; const domi::KernelDefWithHandle &kernel_def_with_handle_; - const std::string stub_name_; + const std::string model_name_; + std::string stub_name_; void *handle_ = nullptr; }; + +class AtomicAddrCleanTaskBuilder : public TbeTaskBuilder { + public: + AtomicAddrCleanTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) + : TbeTaskBuilder(model_name, node, task_def) {} + ~AtomicAddrCleanTaskBuilder() override = default; + + protected: + std::string GetKeyForOpParamSize() const override; + std::string GetKeyForTvmMetaData() const override; + TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) const override; + void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const override; + Status InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m) override; +}; } // namespace ge #endif // GE_SINGLE_OP_TASK_TBE_TASK_BUILDER_H_ diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index ee51d29d..5da5a593 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -106,7 +106,7 @@ class GE_FUNC_VISIBILITY GeGenerator { bool CheckNoAicore(const ComputeGraphPtr &graph); void RemoveConst(const vector &inputs, vector &outputs); Status CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs); - Status InferFormatForSingleOp(OpDescPtr &op_desc); + Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph); using GeRootModelPtr = std::shared_ptr; Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); diff --git a/metadef b/metadef index 3e14f92d..84e7ab39 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 3e14f92d47abc9a2e703be2171f047553f7597e0 +Subproject commit 84e7ab39b0daf7ca2b2f5549e3279647da7875e2 diff --git a/parser b/parser index 4151e330..ffd94df4 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 4151e33028c518057289b569b36cd4069af362a4 +Subproject commit ffd94df471f7dd2b1928cc8d27e43e7210aaa7e7 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3dd94051..f5dab366 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -15,7 +15,7 @@ project(tests CXX C) find_package(Threads) -add_subdirectory(depends/cce) + add_subdirectory(depends/slog) add_subdirectory(depends/mmpa) add_subdirectory(depends/runtime) diff --git a/tests/depends/cce/CMakeLists.txt b/tests/depends/cce/CMakeLists.txt deleted file mode 100644 index 05fa8133..00000000 --- a/tests/depends/cce/CMakeLists.txt +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -#cmake_minimum_required(VERSION 2.8) - -project(STUB_CCE) - -set(CMAKE_CXX_STANDARD 11) - -include_directories(${GE_CODE_DIR}/inc) -include_directories(${GE_CODE_DIR}/inc/framework) -include_directories(${GE_CODE_DIR}/metadef/inc/graph) -include_directories(${GE_CODE_DIR}/inc/external) -include_directories(${GE_CODE_DIR}/metadef/inc/external) -include_directories(${GE_CODE_DIR}/metadef/inc/external/graph) -include_directories(${GE_CODE_DIR}/metadef) -include_directories(${GE_CODE_DIR}/metadef/inc) -include_directories(${GE_CODE_DIR}/metadef/graph) -include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/ops) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) -set(PROTO_LIST - "${GE_CODE_DIR}/metadef/proto/om.proto" - "${GE_CODE_DIR}/metadef/proto/ge_ir.proto" - "${GE_CODE_DIR}/metadef/proto/task.proto" -) - -protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) - -set(SRCS - "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" - "${GE_CODE_DIR}/metadef/graph/anchor.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/buffer.cc" - "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" - "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" - "${GE_CODE_DIR}/metadef/graph/graph.cc" - "${GE_CODE_DIR}/metadef/graph/model.cc" - "${GE_CODE_DIR}/metadef/graph/model_serialize.cc" - "${GE_CODE_DIR}/metadef/graph/node.cc" - "${GE_CODE_DIR}/metadef/graph/op_desc.cc" - "${GE_CODE_DIR}/metadef/graph/operator.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" - "${GE_CODE_DIR}/metadef/graph/tensor.cc" - "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" - "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/dumper/ge_graph_dumper.cc" - "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" - "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" - "${GE_CODE_DIR}/metadef/graph/shape_refiner.cc" - "${GE_CODE_DIR}/metadef/graph/ge_tensor.cc" - "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" -) -add_library(cce_ge_stub SHARED src/cce_stub.cc ${PROTO_SRCS} ${PROTO_HDRS}) - -target_compile_definitions(cce_ge_stub PRIVATE - google=ascend_private -) - -target_link_libraries(cce_ge_stub - $ - -Wl,--no-as-needed - ascend_protobuf - -Wl,--as-needed - c_sec -) - -add_library(cce_stub SHARED ${SRCS} ${PROTO_SRCS} ${PROTO_HDRS}) - -target_compile_definitions(cce_stub PRIVATE - google=ascend_private -) - -target_link_libraries(cce_stub PRIVATE - $ - -Wl,--no-as-needed - ascend_protobuf - -Wl,--as-needed - c_sec -) diff --git a/tests/depends/cce/src/cce_stub.cc b/tests/depends/cce/src/cce_stub.cc deleted file mode 100644 index 03df3d0c..00000000 --- a/tests/depends/cce/src/cce_stub.cc +++ /dev/null @@ -1,576 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include "cce/optimizer/fusion_engine.h" -#include "common/op/attr_value_util.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/graph_utils.h" - -using namespace cce; -using namespace std; -using namespace ge; -using namespace fusion; - -uint64_t global_mem_base = 0; - -namespace cce { -#define DIM_MAX_SIZE 8 -static const uint32_t C0 = 16; -struct tagCcPad {}; -struct tagCcConvolution {}; - -struct tagCcLRN {}; - -struct tagCcFasterRcnnProposal {}; -struct tagCcRoiAlign {}; -struct tagCcBatchNorm {}; -struct tagCcDetectpostprocess {}; - -struct tagCcSsdDetectionOutput {}; - -struct tagCcRefinedetDetectionOutput {}; - -struct tagCcMsrGenerateRpnProposals {}; - -struct tagCcFilter { - vector dims; -}; - -struct tagCcTensor { - ccTensorFormat_t format; - ccDataType_t data_type; - uint32_t dim_cnt; - int32_t real_dim_cnt; - uint32_t data_size; - int32_t dim_buf[DIM_MAX_SIZE]; - int32_t stride_buf[DIM_MAX_SIZE]; -}; - -typedef struct tagCcPooling { - ccPoolingMode_t mode; - ccPaddingMode_t pad_mode; - ccNanPropagation_t max_pooling_nan_opt; - uint32_t dim_cnt; - int32_t window_dim[6]; - int32_t padding[6]; - int32_t stride[6]; -} ccPooling_t; - -struct tagCcActivation {}; - -struct tagCcFasterRcnnDetectionOutput {}; -struct tagCcSpatialTransformer {}; - -struct tagCcPower {}; -struct tagCcResizeBilinear {}; -struct tagCcSsdNormalize {}; -struct tagCcSsdPostProcessor {}; -struct tagCcSsdPriorBox {}; -struct tagCcPsRoiPooling {}; - -struct tagMsrFastRcnnPredictions {}; -struct tagCcPRelu {}; -struct tagCcStridedSlice {}; - -struct tagCcStridedSliceAttrs {}; - -struct tagCcRnn {}; - -struct tagCcArgmaxmin {}; - -typedef struct tagCcLog { - ccDataType_t data_type; - uint32_t param_cnt; -} ccLog_t; -typedef struct tagCcLog *ccLogDescriptor_t; - -struct tagCcPadV2 {}; - -ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t x_desc, const ccPadV2Descriptor_t pad_desc, int32_t *dim_cnt, - int32_t dim[], int32_t dim_len) { - *dim_cnt = 4; - dim[0] = 1; - dim[1] = 2; - dim[2] = 2; - dim[3] = 3; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccPadV2Forward(ccHandle_t handle, const ccPadV2Descriptor_t pad_desc, const void *alpha, - const ccTensorDescriptor_t x_desc, const void *x, const void *beta, - const ccTensorDescriptor_t output_desc, void *output) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccSetKernelOpMap(ccHandle_t handle) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t buf_len, const uint32_t task_index) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t pad_desc, const int32_t pad_shape_cnt, - const int32_t pad_shape_low[], const int32_t pad_shape_high[], - const ccPadMode_t pad_mode, const void *pad_value, const ccDataType_t pad_value_type) { - return CC_STATUS_SUCCESS; -} - -struct tagCcYoloDetectionOutput { - ccYoloVersion_t yolo_version; - uint32_t net_h; - uint32_t net_w; - uint32_t post_top_k; - uint32_t classes; - float nms_threshold; - float iou_thre_decay; - float coor_scale_factor; - bool relative; - float obj_threshold; - float cls_threshold; - uint32_t bias_num; - float *bias; -}; - -struct tagCcYoloRegion {}; - -struct tagCcEltwise {}; - -struct tagCcHashTableLookup {}; - -struct tagCcEmbeddingAttnDecoder {}; -struct tagNonMaxSuppression {}; - -struct tagCcArcSinCos {}; -struct tagCcPow {}; -struct tagCcConcatFive2Four_t {}; -struct tagCcConcatFour2Five_t {}; - -ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *pow_desc) { - *pow_desc = new tagCcPow(); - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t pow_desc, ccDataType_t data_type, uint32_t param_cnt) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *pow_desc) { - if (nullptr == pow_desc) { - return CC_STATUS_BAD_PARAM; - } - - delete *pow_desc; - *pow_desc = 0; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t pow_desc, const void *pow_param, const void *alpha, - const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, - const void *y, const void *beta, const ccTensorDescriptor_t z_desc, void *z) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x, - const ccTensorDescriptor_t y_desc, const void *y, const void *beta, - const ccTensorDescriptor_t output_desc, void *output) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compare_type, const void *alpha, - const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, - const void *y, const void *beta, const ccTensorDescriptor_t output_desc, void *output) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t x_desc, const ccTensorDescriptor_t y_desc, int32_t *dim_cnt, - int32_t *dim, int32_t dim_len) { - *dim_cnt = 4; - dim[0] = 1; - dim[1] = 1; - dim[2] = 1; - dim[3] = 1; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccArcTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x, - const void *beta, const ccTensorDescriptor_t y_desc, void *y) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccAtanhForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x, - const void *beta, const ccTensorDescriptor_t y_desc, void *y) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccIsDepthwiseHighPerformance(int32_t input_n, int32_t input_c, int32_t input_h, int32_t input_w, - int32_t filter_n, int32_t filter_c, int32_t filter_h, int32_t filter_w, - int32_t dilation_h, int32_t dilation_w, int32_t pad_h_head, int32_t pad_h_tail, - int32_t pad_w_head, int32_t pad_w_tail, int32_t stride_h, int32_t stride_w, - int32_t group_num, bool &is_high_performance, bool is_quant, - ccDataType_t input_data_type, ccDataType_t output_data_type) { - is_high_performance = true; - return CC_STATUS_SUCCESS; -} - -struct tagCcSpaceToBatch {}; - -struct tagCcBatchToSpace {}; - -struct tagCcResizeNearestNeighbor {}; - -ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *stream_id) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccGetRtVersion(uint32_t *count) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensor_desc) { - if (nullptr == tensor_desc) { - return CC_STATUS_BAD_PARAM; - } - delete *tensor_desc; - *tensor_desc = 0; - return CC_STATUS_SUCCESS; -} -ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filter_desc) { - delete *filter_desc; - *filter_desc = 0; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filter_desc, uint32_t *size) { - *size = filter_desc->dims[0] * filter_desc->dims[1] * filter_desc->dims[2] * filter_desc->dims[3] * sizeof(float); - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccTransFilter(const ccFilterDescriptor_t w_desc, const void *w, ccFilterDescriptor_t y_desc, void *y, - uint32_t y_size_in_bytes) { - y = const_cast(w); - - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensor_desc) { - *tensor_desc = new tagCcTensor(); - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format, ccDataType_t data_type, - int32_t n, int32_t c, int32_t h, int32_t w) { - if (CC_TENSOR_NHWC == format) { - tensor_desc->dim_buf[0] = n; - tensor_desc->dim_buf[1] = h; - tensor_desc->dim_buf[2] = w; - tensor_desc->dim_buf[3] = c; - } else { - tensor_desc->dim_buf[0] = n; - tensor_desc->dim_buf[1] = c; - tensor_desc->dim_buf[2] = h; - tensor_desc->dim_buf[3] = w; - } - tensor_desc->dim_cnt = 4; - tensor_desc->data_type = data_type; - tensor_desc->format = format; - tensor_desc->data_size = n * c * h * w * sizeof(data_type); - return CC_STATUS_SUCCESS; -} -ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) { - if ((NULL == tensor_desc) || (NULL == size)) { - return CC_STATUS_BAD_PARAM; - } - *size = tensor_desc->data_size; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) { - *size = tensor_desc->data_size; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filter_desc) { - *filter_desc = new tagCcFilter(); - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type, - int32_t k, int32_t c, int32_t h, int32_t w) { - filter_desc->dims.push_back(k); - filter_desc->dims.push_back(c); - filter_desc->dims.push_back(h); - filter_desc->dims.push_back(w); - - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, - ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w) { - filter_desc->dims.push_back(k); - filter_desc->dims.push_back(c); - filter_desc->dims.push_back(h); - filter_desc->dims.push_back(w); - - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t stream_id) { return CC_STATUS_SUCCESS; } -ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *pooling_mask_desc) { - *pooling_mask_desc = new tagCcTensor(); - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format, - ccDataType_t data_type, int32_t n, int32_t c, int32_t h, int32_t w, - int32_t window_h, int32_t window_w) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type, - int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0) { - return CC_STATUS_SUCCESS; -} - -/// @ingroup dnn -/// @brief get the format and dimcnt of GeTensor -/// @param [in] tensor_desc descriptor of tensor -/// @param [in|out] format point to format -/// @return ccStatus_t -ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensor_desc, ccTensorFormat_t *format) { - *format = tensor_desc->format; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccTransTensor(const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, void *y, - uint32_t y_size_in_bytes) { - return CC_STATUS_SUCCESS; -} -void cceSysInit() {} - -bool compilerStubFree() { return true; } - -bool compilerStubInit() { return true; } - -ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, - ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w, - ccDataType_t output_data_type) { - filter_desc->dims.push_back(k); - filter_desc->dims.push_back(c); - filter_desc->dims.push_back(h); - filter_desc->dims.push_back(w); - - return CC_STATUS_SUCCESS; -} -ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensor_desc, ccDataType_t data_type, int32_t dim_cnt, - int32_t dimA[]) { - tensor_desc->data_type = data_type; - tensor_desc->data_size = sizeof(data_type); - for (int32_t i = 0; i < dim_cnt; i++) { - tensor_desc->data_size = tensor_desc->data_size * dimA[i]; - } - tensor_desc->format = CC_TENSOR_ND; - return CC_STATUS_SUCCESS; -} - -ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag) { return CC_STATUS_SUCCESS; } -ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t real_dim_cnt) { - if (tensor_desc != NULL && tensor_desc != nullptr) { - tensor_desc->real_dim_cnt = real_dim_cnt; - } - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccGetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t *real_dim_cnt) { - *real_dim_cnt = tensor_desc->real_dim_cnt; - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode, - const uint16_t *scale, const uint16_t *offset, const uint8_t *offset_pad) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode, - const uint16_t *scale_rq, const uint16_t *next_layer_offset, - const int32_t *offset_w) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode, - const uint16_t *scale_dq, const int32_t *offset_w) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantize_info, ccQuantizeAlgo_t quant_algo, - ccScaleType_t scale_type, bool relu_flag) { - return CC_STATUS_SUCCESS; -} -ccStatus_t ccPrintTimeStat() { return CC_STATUS_SUCCESS; } -ccStatus_t ccSetModelId(ccHandle_t handle, uint32_t model_id) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccGetKernelContext(rtStream_t stream_id, ccOpContext &op_context) { - if (stream_id == nullptr) { - op_context.kernelType = ccKernelType::TE; - } else { - op_context.kernelType = ccKernelType::CCE_AI_CORE; - op_context.opId = 1; - op_context.kernelFuncId = 1; - op_context.isFlowtable = true; - op_context.opCount = 1; - op_context.opIndex2[0] = 0; - } - - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccUpdateKernelArgs(ccOpContext &op_context, uint64_t data_base_addr, uint64_t weight_base_addr, - uint64_t variable_base_addr, void *args_addr, uint64_t args_size, void *l2ctrl_addr) { - return CC_STATUS_SUCCESS; -} -ccStatus_t ccGetKernelArgsAddrs(ccOpContext &op_context, void *args_addr, uint64_t args_size, void *l2ctrl_addr, - std::vector &op_addrs_info) { - // cce - ccOpAddrsInfo tmp_op_addrs_info; - uint64_t tmp_input = (uint64_t)global_mem_base; - tmp_op_addrs_info.addrPos = &tmp_input; - tmp_op_addrs_info.addrData = tmp_input; - op_addrs_info.push_back(tmp_op_addrs_info); - - uint64_t tmp_output = (uint64_t)(global_mem_base + 5476352); - tmp_op_addrs_info.addrPos = &tmp_output; - tmp_op_addrs_info.addrData = tmp_output; - op_addrs_info.push_back(tmp_op_addrs_info); - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccSetKernelArgs(std::vector &date_info) { return CC_STATUS_SUCCESS; } -} // namespace cce -// ccFusion no namespace -ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, CceFusionMemCfg_t mem_cfg) { - return CC_STATUS_SUCCESS; -} - -//???ccFusion ????namespace cce?? -ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, uint32_t addr_change_flag) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t ccFusionEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccFusionTaskEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccKernelLaunchRepeat(ccHandle_t handle) { return CC_STATUS_SUCCESS; } - -ccStatus_t ccKernelDelete(ccHandle_t handle) { return CC_STATUS_SUCCESS; } - -ccStatus_t cce::ccSetTensorFormat(cce::tagCcTensor *, cce::tagCcTensorFormat) { return CC_STATUS_SUCCESS; } - -namespace fusion { -uint32_t BufferFusion(std::shared_ptr, std::shared_ptr, bool) { return 0; } - -uint32_t BufferFusionTrain(std::shared_ptr, std::shared_ptr) { return 0; } - -uint32_t GraphFusionTrain(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) { return 0; } -} // namespace fusion -namespace fusion { -using namespace ge; - -uint32_t Fusion(ComputeGraphPtr model_graph, ComputeGraphPtr fusion_graph, kScopeNodeMap_t &te_fusion_map) { - OpDescPtr op_def_a = std::make_shared(); - op_def_a->SetName("reduction_nd"); - op_def_a->SetType("reduction_nd"); - - GeTensorDescPtr v_input_desc = std::make_shared(); - op_def_a->AddInputDesc(*v_input_desc); - - vector v_input; - v_input.push_back(0); - op_def_a->SetInputOffset(v_input); - - GeTensorDesc input_desc = op_def_a->GetInputDesc(0); - input_desc.SetFormat(FORMAT_NCHW); - input_desc.SetDataType(DT_FLOAT); - input_desc.SetShape(GeShape({1, 3, 5, 5})); - ge::TensorUtils::SetSize(input_desc, 192); - ge::TensorUtils::SetRealDimCnt(input_desc, 4); - - GeTensorDescPtr output_desc = std::make_shared(); - op_def_a->AddOutputDesc(*output_desc); - - output_desc->SetFormat(FORMAT_NCHW); - output_desc->SetDataType(DT_FLOAT); - output_desc->SetShape(GeShape({1, 3, 5})); - ge::TensorUtils::SetSize(*output_desc, 96); - ge::TensorUtils::SetRealDimCnt(*output_desc, 3); - - OpDescPtr op_def_b = std::make_shared(); - op_def_b->SetName("transdata_1"); - op_def_b->SetType("TransData"); - - int stream_num = 1; - int flag = 0; - - NodePtr node_a = fusion_graph->AddNode(op_def_a); - NodePtr node_b = fusion_graph->AddNode(op_def_b); - - GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); - int32_t a = 1; - int32_t b = 2; - - AttrUtils::SetInt(op_def_a, "fusion_scope", a); - AttrUtils::SetInt(op_def_b, "fusion_scope", b); - - vector node_list1; - node_list1.push_back(node_a); - vector node_list2; - node_list2.push_back(node_b); - te_fusion_map[1] = node_list1; - te_fusion_map[2] = node_list2; - - return FUSION_STATUS_SUCCESS; -} - -uint32_t FusionTaskBuild(cce::ccHandle_t cc_handle, ge::ComputeGraphPtr fusion_graph, ge::Buffer &buffer, - ModelRes &model_res, std::vector &task_def_list_) { - TaskDef task_def_temp; - task_def_list_.push_back(task_def_temp); - - return FUSION_STATUS_SUCCESS; -} -uint32_t GraphFusion(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) { - *fusion_graph = *orig_graph; - return FUSION_STATUS_SUCCESS; -} - -void FusionTaskBuildComplete(std::vector cc_handle_list) { return; } - -} // namespace fusion - -ccStatus_t cce::ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensor_desc, - const ccVecQuantizePara_t *vec_quantize_para) { - return CC_STATUS_SUCCESS; -} - -ccStatus_t cce::ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, const uint8_t *offset_w, - const uint8_t *offset_d, const uint16_t *scale_req, - const uint16_t *offset_d_next) { - return CC_STATUS_SUCCESS; -} diff --git a/tests/depends/cce/src/op_kernel_registry.cc b/tests/depends/cce/src/op_kernel_registry.cc deleted file mode 100644 index 5ccd1391..00000000 --- a/tests/depends/cce/src/op_kernel_registry.cc +++ /dev/null @@ -1,29 +0,0 @@ -#include "register/op_kernel_registry.h" - -namespace ge { -class OpKernelRegistry::OpKernelRegistryImpl { - -}; - -OpKernelRegistry::OpKernelRegistry() { -} - -OpKernelRegistry::~OpKernelRegistry() { - -} - -bool OpKernelRegistry::IsRegistered(const std::string &op_type) { - return false; -} - -std::unique_ptr OpKernelRegistry::CreateHostCpuOp(const std::string &op_type) { - return nullptr; -} - -void OpKernelRegistry::RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn) { -} - -HostCpuOpRegistrar::HostCpuOpRegistrar(const char *op_type, HostCpuOp *(*create_fn)()) { - -} -} // namespace ge \ No newline at end of file diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt index ccf9ce5e..8da69c14 100644 --- a/tests/ut/common/graph/CMakeLists.txt +++ b/tests/ut/common/graph/CMakeLists.txt @@ -61,53 +61,21 @@ set(UT_FILES "testcase/ge_graph/ge_model_unittest.cc" ) -set(SRC_FILES - "${GE_CODE_DIR}/metadef/graph/option/ge_local_context.cc" - "${GE_CODE_DIR}/metadef/graph/option/ge_context.cc" - "${GE_CODE_DIR}/metadef/graph/anchor.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/buffer.cc" - "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" - "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" - "${GE_CODE_DIR}/metadef/graph/graph.cc" - "${GE_CODE_DIR}/metadef/graph/gnode.cc" - "${GE_CODE_DIR}/metadef/graph/ascend_string.cc" - "${GE_CODE_DIR}/metadef/graph/model.cc" - "${GE_CODE_DIR}/metadef/graph/model_serialize.cc" - "${GE_CODE_DIR}/metadef/graph/node.cc" - "${GE_CODE_DIR}/metadef/graph/op_desc.cc" - "${GE_CODE_DIR}/metadef/graph/operator.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" - "${GE_CODE_DIR}/metadef/graph/tensor.cc" - "${GE_CODE_DIR}/metadef/graph/types.cc" - "${GE_CODE_DIR}/metadef/graph/ge_tensor.cc" - "${GE_CODE_DIR}/metadef/graph/shape_refiner.cc" - "${GE_CODE_DIR}/metadef/graph/format_refiner.cc" - "${GE_CODE_DIR}/metadef/graph/inference_context.cc" - "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" - "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/dumper/ge_graph_dumper.cc" - "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/ge_ir_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/tensor_utils.cc" - "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" - "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" - "${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" - "${GE_CODE_DIR}/metadef/graph/ref_relation.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/expand_dimension.cc" - "${GE_CODE_DIR}/metadef/graph/utils/transformer_utils.cc" -) +FILE(GLOB_RECURSE GRAPH_SRC_FILES_DEPTH0 ${GE_CODE_DIR}/metadef/graph/*.cc) +FILE(GLOB_RECURSE GRAPH_SRC_FILES_DEPTH1 ${GE_CODE_DIR}/metadef/graph/*/*.cc) +FILE(GLOB_RECURSE GRAPH_SRC_FILES_DEPTH2 ${GE_CODE_DIR}/metadef/graph/*/*/*.cc) + +AUX_SOURCE_DIRECTORY(${GE_CODE_DIR}/metadef/ops GRAPH_OPS_SRC_FILES) +AUX_SOURCE_DIRECTORY(${GE_CODE_DIR}/metadef/third_party/transformer/src TRANSFORMER_SRC_FILES) -#add_executable(ut_libgraph ${UT_FILES} ${SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) -add_executable(ut_libgraph ${UT_FILES} ${SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) +add_executable(ut_libgraph ${UT_FILES} + ${GRAPH_SRC_FILES_DEPTH0} + ${GRAPH_SRC_FILES_DEPTH1} + ${GRAPH_SRC_FILES_DEPTH2} + ${GRAPH_OPS_SRC_FILES} + ${TRANSFORMER_SRC_FILES} + ${PROTO_SRCS} ${PROTO_HDRS} +) target_compile_options(ut_libgraph PRIVATE -g --coverage -fprofile-arcs -ftest-coverage diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index d8fcd6c3..42fa6128 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -20,6 +20,7 @@ set(CMAKE_CXX_STANDARD 11) set(PROTO_LIST "${GE_CODE_DIR}/metadef/proto/om.proto" "${GE_CODE_DIR}/metadef/proto/ge_ir.proto" + "${GE_CODE_DIR}/metadef/proto/task.proto" "${GE_CODE_DIR}/metadef/proto/ge_api.proto" "${GE_CODE_DIR}/metadef/proto/insert_op.proto" "${GE_CODE_DIR}/metadef/proto/dump_task.proto" @@ -69,62 +70,16 @@ include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) include_directories(${CMAKE_BINARY_DIR}/proto/ge/proto) -set(GRAPH_SRC_FILES - "${GE_CODE_DIR}/metadef/graph/option/ge_local_context.cc" - "${GE_CODE_DIR}/metadef/graph/option/ge_context.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" - "${GE_CODE_DIR}/metadef/graph/anchor.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/buffer.cc" - "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" - "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" - "${GE_CODE_DIR}/metadef/graph/graph.cc" - "${GE_CODE_DIR}/metadef/graph/gnode.cc" - "${GE_CODE_DIR}/metadef/graph/ascend_string.cc" - "${GE_CODE_DIR}/metadef/graph/inference_context.cc" - "${GE_CODE_DIR}/metadef/graph/shape_refiner.cc" - "${GE_CODE_DIR}/metadef/graph/model.cc" - "${GE_CODE_DIR}/metadef/graph/model_serialize.cc" - "${GE_CODE_DIR}/metadef/graph/node.cc" - "${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" - "${GE_CODE_DIR}/metadef/graph/op_desc.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/expand_dimension.cc" - "${GE_CODE_DIR}/metadef/graph/operator.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" - "${GE_CODE_DIR}/metadef/graph/ge_tensor.cc" - "${GE_CODE_DIR}/metadef/graph/ref_relation.cc" - "${GE_CODE_DIR}/metadef/graph/tensor.cc" - "${GE_CODE_DIR}/metadef/graph/types.cc" - "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" - "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/dumper/ge_graph_dumper.cc" - "${GE_CODE_DIR}/metadef/graph/utils/ge_ir_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/tensor_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/transformer_utils.cc" - "${GE_CODE_DIR}/metadef/graph/debug/graph_debug.cc" - "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" - "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" - "${GE_CODE_DIR}/metadef/register/register.cpp" - "${GE_CODE_DIR}/metadef/register/register_pass.cpp" - "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp" - "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp" - "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp" - "${GE_CODE_DIR}/metadef/register/register_format_transfer.cc" - "${GE_CODE_DIR}/metadef/graph/format_refiner.cc" - "${GE_CODE_DIR}/metadef/register/ops_kernel_builder_registry.cc" - "${GE_CODE_DIR}/metadef/register/op_tiling.cpp" - "${GE_CODE_DIR}/metadef/graph/utils/tuning_utils.cc" - "${GE_CODE_DIR}/metadef/register/op_tiling_registry.cpp" - "${GE_CODE_DIR}/metadef/register/op_tiling_registry_impl.cpp" -) + +#### GRAPH_SRC_FILES #### +FILE(GLOB_RECURSE GRAPH_SRC_FILES_DEPTH0 ${GE_CODE_DIR}/metadef/graph/*.cc) +FILE(GLOB_RECURSE GRAPH_SRC_FILES_DEPTH1 ${GE_CODE_DIR}/metadef/graph/*/*.cc) +FILE(GLOB_RECURSE GRAPH_SRC_FILES_DEPTH2 ${GE_CODE_DIR}/metadef/graph/*/*/*.cc) + +AUX_SOURCE_DIRECTORY(${GE_CODE_DIR}/metadef/ops GRAPH_OPS_SRC_FILES) +AUX_SOURCE_DIRECTORY(${GE_CODE_DIR}/metadef/register GRAPH_REGISTER_SRC_FILES) +AUX_SOURCE_DIRECTORY(${GE_CODE_DIR}/metadef/third_party/transformer/src TRANSFORMER_SRC_FILES) + set(PARSER_SRC_FILES "${GE_CODE_DIR}/parser/parser/common/op_map.cc" @@ -134,6 +89,7 @@ set(PARSER_SRC_FILES "${GE_CODE_DIR}/parser/parser/common/model_saver.cc" "${GE_CODE_DIR}/parser/parser/common/parser_types.cc" "${GE_CODE_DIR}/parser/parser/common/parser_inner_ctx.cc" + "${GE_CODE_DIR}/parser/parser/tensorflow/iterator_fusion_pass.cc" ) set(COMMON_SRC_FILES @@ -155,20 +111,12 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/init/gelib.cc" "${GE_CODE_DIR}/ge/engine_manager/dnnengine_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" - "${GE_CODE_DIR}/ge/session/session_manager.cc" "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/ge_profiling.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" - "${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" - "${GE_CODE_DIR}/ge/session/inner_session.cc" + "${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" "${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc" - "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" - "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" - "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" @@ -178,128 +126,16 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_server.cc" - "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" "${GE_CODE_DIR}/ge/graph/preprocess/multi_batch_copy_graph.cc" "${GE_CODE_DIR}/ge/graph/optimize/mem_rw_conflict_optimize.cc" - "${GE_CODE_DIR}/ge/graph/passes/pass_manager.cc" - "${GE_CODE_DIR}/ge/graph/passes/resource_pair_add_control_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/resource_pair_remove_control_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/pass_utils.cc" - "${GE_CODE_DIR}/ge/graph/passes/base_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/bitcast_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/constant_folding_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/aicpu_constant_folding_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/reshape_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/reshape_recovery_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/transop_breadth_fusion_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/transop_depth_fusion_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/same_transdata_breadth_fusion_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/transop_without_reshape_fusion_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/compile_nodes_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/variable_prepare_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/variable_ref_delete_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/subgraph_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/data_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/net_output_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/replace_transshape_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/constant_fuse_same_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/print_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/no_use_reshape_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/iterator_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/input_output_connection_identify_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/atomic_addr_clean_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/mark_same_addr_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/mark_graph_unknown_status_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/mark_agnostic_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/dimension_compute_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/get_original_format_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/shape_operate_op_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/assert_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/dropout_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/infer_base_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/infershape_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/infer_value_range_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/unused_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/permute_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/ctrl_edge_transfer_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/end_of_sequence_add_control_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/stop_gradient_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/prevent_gradient_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/identity_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/ref_identity_delete_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/placeholder_with_default_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/snapshot_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/guarantee_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/var_is_initialized_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/parallel_concat_start_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/folding_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/cast_translate_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/prune_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/merge_to_stream_merge_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/merge_input_memcpy_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/switch_to_stream_switch_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/mark_force_unknown_for_cond_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/attach_stream_label_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/multi_batch_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/multi_batch_clone_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/subexpression_migration_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/subgraph_const_migration_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/unused_args_clean_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/next_iteration_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/control_trigger_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/cond_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/cond_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/save_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/switch_dead_branch_elimination.cc" - "${GE_CODE_DIR}/ge/graph/passes/switch_logic_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/switch_data_edges_bypass.cc" - "${GE_CODE_DIR}/ge/graph/passes/merge_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/variable_op_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/cast_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/transpose_transdata_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/hccl_memcpy_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/hccl_continuous_memcpy_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/flow_ctrl_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/global_step_insert_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/link_gen_mask_nodes_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/replace_with_empty_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/hccl_tailing_optimization_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" - "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" "${GE_CODE_DIR}/ge/common/kernel_store.cc" "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" "${GE_CODE_DIR}/ge/common/auth/file_saver.cc" "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc" "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_context.cc" "${GE_CODE_DIR}/ge/graph/load/graph_loader.cc" "${GE_CODE_DIR}/ge/graph/optimize/graph_optimize.cc" "${GE_CODE_DIR}/ge/graph/build/graph_builder.cc" @@ -314,13 +150,10 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/partition/dynamic_shape_partition.cc" "${GE_CODE_DIR}/ge/graph/optimize/summary_optimize.cc" "${GE_CODE_DIR}/ge/ir_build/option_utils.cc" - "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/ge_aipp_op.cc" - "${GE_CODE_DIR}/ge/graph/preprocess/multi_batch_options.cc" "${GE_CODE_DIR}/ge/graph/build/model_builder.cc" "${GE_CODE_DIR}/ge/graph/build/run_context.cc" "${GE_CODE_DIR}/ge/graph/build/stream_graph_optimizer.cc" "${GE_CODE_DIR}/ge/graph/build/task_generator.cc" - "${GE_CODE_DIR}/ge/graph/partition/graph_partition.cc" "${GE_CODE_DIR}/ge/graph/partition/engine_place.cc" "${GE_CODE_DIR}/ge/graph/build/stream_allocator.cc" "${GE_CODE_DIR}/ge/graph/build/memory/memory_assigner.cc" @@ -347,10 +180,10 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" "${GE_CODE_DIR}/ge/common/model_saver.cc" - "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" "${GE_CODE_DIR}/ge/common/ge/datatype_util.cc" "${GE_CODE_DIR}/ge/ge_local_engine/engine/host_cpu_engine.cc" "${GE_CODE_DIR}/ge/session/omg.cc" + "${GE_CODE_DIR}/ge/common/thread_pool.cc" "${GE_CODE_DIR}/ge/ge_opt_info/ge_opt_info.cc" ) @@ -374,57 +207,25 @@ set(COMMON_FORMAT_SRC_FILES "${GE_CODE_DIR}/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc" "${GE_CODE_DIR}/ge/common/formats/utils/formats_trans_utils.cc" "${GE_CODE_DIR}/ge/graph/manager/util/hcom_util.cc" - "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" ) -set(GRAPH_OPTIMIZE_COMMON_SRC_FILES - "${GE_CODE_DIR}/ge/graph/optimize/graph_optimize.cc" - "${GE_CODE_DIR}/ge/graph/optimize/summary_optimize.cc" - "${GE_CODE_DIR}/ge/graph/optimize/mem_rw_conflict_optimize.cc" -) - - set(GRAPH_PREPARE_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/ge_aipp_op.cc" - #"${GE_CODE_DIR}/ge/graph/preprocess/insert_op/base_insert_op.cc" -) - -set(GRAPH_PARTITION_COMMON_SRC_FILES - "${GE_CODE_DIR}/ge/graph/partition/graph_partition.cc" - "${GE_CODE_DIR}/ge/plugin/engine/dnnengines.cc" - "${GE_CODE_DIR}/ge/graph/partition/engine_place.cc" -) - -set(GRAPH_LOAD_COMMON_SRC_FILES - "${GE_CODE_DIR}/ge/graph/load/graph_loader.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_manager_utils.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_mem_allocator.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" - "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" - "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" - "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" - "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" - "${GE_CODE_DIR}/ge/common/thread_pool.cc" + "${GE_CODE_DIR}/ge/graph/preprocess/multi_batch_options.cc" ) -set(DISTINCT_GRAPH_LOAD_SRC_FILES - "${GE_CODE_DIR}/ge/graph/manager/util/hcom_util.cc" - "${GE_CODE_DIR}/ge/graph/manager/util/debug.cc" - "${GE_CODE_DIR}/ge/common/properties_manager.cc" - "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" - "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" - "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" - "${GE_CODE_DIR}/ge/common/util.cc" +set(GRAPH_DAVINCI_MODEL_SRC_FILES + "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/data_dumper.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/data_inputer.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/davinci_model.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_offset.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/event_record_task_info.cc" @@ -446,44 +247,24 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" - "${GE_CODE_DIR}/ge/model/ge_model.cc" - "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" - "${GE_CODE_DIR}/ge/common/debug/memory_dumper.cc" - "${GE_CODE_DIR}/ge/executor/ge_executor.cc" - "${GE_CODE_DIR}/ge/common/auth/file_saver.cc" + "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" "${GE_CODE_DIR}/ge/graph/manager/model_manager/event_manager.cc" ) set(GRAPH_EXECUTE_COMMON_SRC_FILES - "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_context.cc" - "${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc" - "${GE_CODE_DIR}/ge/ge_opt_info/ge_opt_info.cc" - "${GE_CODE_DIR}/ge/graph/manager/graph_context.h" + "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model_stub.cc" ) set(GRAPH_BUILD_COMMON_SRC_FILES - "${GE_CODE_DIR}/ge/graph/build/graph_builder.cc" - "${GE_CODE_DIR}/ge/graph/build/task_generator.cc" + "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/client/ge_api.cc" "${GE_CODE_DIR}/ge/session/inner_session.cc" "${GE_CODE_DIR}/ge/session/session_manager.cc" - "${GE_CODE_DIR}/ge/engine_manager/dnnengine_manager.cc" + "${GE_CODE_DIR}/ge/graph/execute/model_executor.cc" + "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" + "${GE_CODE_DIR}/ge/plugin/engine/dnnengines.cc" "${GE_CODE_DIR}/ge/plugin/engine/engine_manage.cc" - "${GE_CODE_DIR}/ge/graph/build/logical_stream_allocator.cc" - "${GE_CODE_DIR}/ge/graph/build/stream_allocator.cc" - "${GE_CODE_DIR}/ge/graph/build/memory/block_mem_assigner.cc" - "${GE_CODE_DIR}/ge/graph/build/memory/binary_block_mem_assigner.cc" - "${GE_CODE_DIR}/ge/graph/build/memory/hybrid_mem_assigner.cc" - "${GE_CODE_DIR}/ge/graph/build/memory/max_block_mem_assigner.cc" - "${GE_CODE_DIR}/ge/model/ge_model.cc" - "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" - "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" - "${GE_CODE_DIR}/ge/common/thread_pool.cc" - "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" - "${GE_CODE_DIR}/ge/graph/build/run_context.cc" - "${GE_CODE_DIR}/ge/graph/common/local_context.cc" + "${GE_CODE_DIR}/ge/graph/manager/graph_context.cc" ) set(GRAPH_PASS_COMMON_SRC_FILES @@ -493,7 +274,6 @@ set(GRAPH_PASS_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/variable_ref_delete_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/atomic_addr_clean_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/constant_folding_pass.cc" - "${GE_CODE_DIR}/parser/parser/tensorflow/iterator_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/iterator_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/net_output_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/print_op_pass.cc" @@ -532,10 +312,8 @@ set(GRAPH_PASS_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/same_transdata_breadth_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/compile_nodes_pass.cc" - "${GE_CODE_DIR}/ge/graph/common/transop_util.cc" "${GE_CODE_DIR}/ge/graph/passes/flow_ctrl_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" - #"${GE_CODE_DIR}/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/folding_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/variable_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transpose_transdata_pass.cc" @@ -544,10 +322,106 @@ set(GRAPH_PASS_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/infer_base_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/infershape_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/infer_value_range_pass.cc" - "${GE_CODE_DIR}/ge/ge_local_engine/engine/host_cpu_engine.cc" - "${GE_CODE_DIR}/ge/analyzer/analyzer.cc" + "${GE_CODE_DIR}/ge/graph/passes/resource_pair_add_control_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/resource_pair_remove_control_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/pass_utils.cc" + "${GE_CODE_DIR}/ge/graph/passes/base_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/bitcast_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/constant_folding_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/aicpu_constant_folding_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/reshape_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/reshape_recovery_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/transop_breadth_fusion_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/transop_depth_fusion_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/same_transdata_breadth_fusion_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/transop_without_reshape_fusion_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/compile_nodes_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/variable_prepare_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/variable_ref_delete_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/subgraph_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/data_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/net_output_pass.cc" - "${GE_CODE_DIR}/ge/graph/common/local_context.cc" + "${GE_CODE_DIR}/ge/graph/passes/replace_transshape_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/constant_fuse_same_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/fuse_data_nodes_with_common_input_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/print_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/no_use_reshape_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/iterator_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/input_output_connection_identify_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/atomic_addr_clean_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/mark_same_addr_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/mark_graph_unknown_status_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/mark_agnostic_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/dimension_compute_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/get_original_format_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/shape_operate_op_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/assert_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/dropout_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/infer_base_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/infershape_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/infer_value_range_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/unused_const_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/permute_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/ctrl_edge_transfer_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/end_of_sequence_add_control_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/stop_gradient_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/prevent_gradient_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/identity_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/ref_identity_delete_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/placeholder_with_default_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/snapshot_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/guarantee_const_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/var_is_initialized_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/parallel_concat_start_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/folding_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/cast_translate_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/prune_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/merge_to_stream_merge_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/merge_input_memcpy_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/switch_to_stream_switch_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/mark_force_unknown_for_cond_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/attach_stream_label_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/multi_batch_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/multi_batch_clone_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/subexpression_migration_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/subgraph_const_migration_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/unused_args_clean_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/next_iteration_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/control_trigger_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/cond_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/cond_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/save_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/switch_dead_branch_elimination.cc" + "${GE_CODE_DIR}/ge/graph/passes/switch_logic_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/switch_data_edges_bypass.cc" + "${GE_CODE_DIR}/ge/graph/passes/merge_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/variable_op_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/cast_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/transpose_transdata_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/hccl_continuous_memcpy_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/flow_ctrl_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/global_step_insert_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/link_gen_mask_nodes_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/replace_with_empty_const_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/hccl_tailing_optimization_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" ) set(KERNEL_SRC_FILES @@ -588,6 +462,7 @@ set(KERNEL_SRC_FILES ) set(SINGLE_OP_SRC_FILES + "${GE_CODE_DIR}/ge/executor/ge_executor.cc" "${GE_CODE_DIR}/ge/single_op/task/build_task_utils.cc" "${GE_CODE_DIR}/ge/single_op/task/op_task.cc" "${GE_CODE_DIR}/ge/single_op/task/tbe_task_builder.cc" @@ -621,7 +496,6 @@ set(SINGLE_OP_SRC_FILES "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_op_task.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_builder.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc" - "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc" @@ -637,10 +511,6 @@ set(SINGLE_OP_SRC_FILES "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model.cc" ) -set(GE_OPT_INFO_SRC_FILES - "${GE_CODE_DIR}/ge/ge_opt_info/ge_opt_info.cc" -) - # test files set(COMMON_TEST_FILES "graph/passes/graph_builder_utils.cc" @@ -653,7 +523,6 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES #"graph/load/new_model_manager_data_inputer_unittest.cc" #"graph/load/new_model_manager_davinci_model_unittest.cc" "graph/load/model_manager_unittest.cc" - #"graph/load/new_model_manager_task_build_unittest.cc" "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" "graph/load/end_graph_task_unittest.cc" "graph/load/new_model_manager_event_manager_unittest.cc" @@ -810,11 +679,12 @@ set(MULTI_PARTS_TEST_FILES "graph/build/task_generator_unittest.cc" "graph/build/buffer_pool_mem_assigner_unittest.cc" "graph/execute/graph_execute_unittest.cc" + "graph/execute/model_executor_unittest.cc" "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" "graph/manager/graph_caching_allocator_unittest.cc" "graph/manager/host_mem_allocator_unittest.cc" - "graph/manager/memory_api_unittest.cc" + "graph/manager/memory_api_unittest.cc" "graph/manager/session_scope_mem_allocator_unittest.cc" "graph/manager/run_graph_unittest.cc" "graph/partition/dynamic_shape_partition_unittest.cc" @@ -877,7 +747,6 @@ set(OTHERS_TEST_FILES list(APPEND COMMON_SHARED_LIBRARIES c_sec slog_stub - cce_ge_stub runtime_stub profiler_stub mmpa_stub @@ -890,7 +759,13 @@ list(APPEND COMMON_SHARED_LIBRARIES # build graph add_library(ge_ut_graph STATIC - ${GRAPH_SRC_FILES} ${PARSER_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS} + ${GRAPH_SRC_FILES_DEPTH0} + ${GRAPH_SRC_FILES_DEPTH1} + ${GRAPH_SRC_FILES_DEPTH2} + ${GRAPH_OPS_SRC_FILES} + ${GRAPH_REGISTER_SRC_FILES} + ${TRANSFORMER_SRC_FILES} + ${PARSER_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS} ) target_compile_definitions(ge_ut_graph PRIVATE @@ -970,57 +845,19 @@ target_link_libraries(ge_prepare_common PRIVATE json ) -# build graph optimize common -add_library(ge_optimize_common STATIC ${GRAPH_OPTIMIZE_COMMON_SRC_FILES} ${PROTO_HDRS}) - -target_compile_definitions(ge_optimize_common PRIVATE - google=ascend_private -) - -target_compile_options(ge_optimize_common PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage - -Werror=format -) - -target_link_libraries(ge_optimize_common PRIVATE - $ - ascend_protobuf - c_sec - json -) - -# build graph partition common -add_library(ge_partition_common STATIC ${GRAPH_PARTITION_COMMON_SRC_FILES} ${PROTO_HDRS}) - -target_compile_definitions(ge_partition_common PRIVATE - google=ascend_private -) - -target_compile_options(ge_partition_common PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage - -Werror=format -) - -target_link_libraries(ge_partition_common PRIVATE - $ - ascend_protobuf - c_sec - json -) - # build build graph load common -add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_HDRS}) +add_library(ge_davinci_model STATIC ${GRAPH_DAVINCI_MODEL_SRC_FILES} ${PROTO_HDRS}) -target_compile_definitions(ge_load_common PRIVATE +target_compile_definitions(ge_davinci_model PRIVATE google=ascend_private ) -target_compile_options(ge_load_common PRIVATE +target_compile_options(ge_davinci_model PRIVATE -g --coverage -fprofile-arcs -ftest-coverage -Werror=format ) -target_link_libraries(ge_load_common PRIVATE +target_link_libraries(ge_davinci_model PRIVATE $ c_sec ascend_protobuf @@ -1123,14 +960,14 @@ target_compile_definitions(ut_libge_multiparts_utest PRIVATE target_link_libraries(ut_libge_multiparts_utest $ - ge_build_common ge_load_common ge_execute_common ge_optimize_common ge_partition_common ge_prepare_common - ge_single_op ge_ut_common_format ge_ut_common + -Wl,--whole-archive + ge_davinci_model ge_build_common ge_prepare_common ge_execute_common ge_pass_common ge_ut_common_format ge_ut_common + -Wl,--no-whole-archive gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lgcov ) # libge_others_utest add_executable(ut_libge_others_utest - ${GE_OPT_INFO_SRC_FILES} ${COMMON_TEST_FILES} ${PASS_TEST_FILES} ${EXECUTE_TEST_FILES} @@ -1145,7 +982,9 @@ target_compile_options(ut_libge_others_utest PRIVATE target_link_libraries(ut_libge_others_utest $ - ge_load_common ge_execute_common ge_ut_common ge_ut_common_format + -Wl,--whole-archive + ge_davinci_model ge_build_common ge_prepare_common ge_pass_common ge_execute_common ge_ut_common ge_ut_common_format + -Wl,--no-whole-archive gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lgcov ) @@ -1163,7 +1002,9 @@ target_compile_options(ut_libge_kernel_utest PRIVATE target_link_libraries(ut_libge_kernel_utest $ - ge_load_common ge_ut_common ge_ut_common_format + -Wl,--whole-archive + ge_davinci_model ge_build_common ge_prepare_common ge_pass_common ge_execute_common ge_ut_common ge_ut_common_format + -Wl,--no-whole-archive gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lgcov ) @@ -1173,7 +1014,6 @@ add_executable(ut_libge_distinct_load_utest ${GENERATOR_TEST_FILES} ${EXECUTOR_TEST_FILES} ${DISTINCT_GRAPH_LOAD_TEST_FILES} - ${DISTINCT_GRAPH_LOAD_SRC_FILES} ${SINGLE_OP_TEST_FILES} ${PROFILING_MNG_TEST_FILES} ${HYBRID_TEST_FILES} @@ -1192,9 +1032,7 @@ target_compile_definitions(ut_libge_distinct_load_utest PRIVATE target_link_libraries(ut_libge_distinct_load_utest $ -Wl,--whole-archive - ge_single_op + ge_single_op ge_davinci_model ge_build_common ge_prepare_common ge_pass_common ge_ut_common ge_ut_common_format -Wl,--no-whole-archive - ge_execute_common ge_load_common - ge_prepare_common ge_optimize_common ge_build_common ge_partition_common ge_ut_common ge_ut_common_format gtest gtest_main gmock gmock_main ${COMMON_SHARED_LIBRARIES} -lrt -ldl -lpthread -lgcov ) diff --git a/tests/ut/ge/common/datatype_transfer_unittest.cc b/tests/ut/ge/common/datatype_transfer_unittest.cc index c311a7cf..ea131b2c 100644 --- a/tests/ut/ge/common/datatype_transfer_unittest.cc +++ b/tests/ut/ge/common/datatype_transfer_unittest.cc @@ -47,7 +47,7 @@ TEST_F(UtestDataTypeTransfer, fp16_fp32) { EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); bool is_equal = true; - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { if (abs((reinterpret_cast(result.data.get()))[i] - ret[i]) > 1.0e-6) { is_equal = false; break; @@ -60,7 +60,7 @@ TEST_F(UtestDataTypeTransfer, fp16_fp32) { CastArgs args2{reinterpret_cast(ret), sizeof(ret) / sizeof(ret[0]), DT_FLOAT, DT_FLOAT16}; EXPECT_EQ(transfer2.TransDataType(args2, result2), SUCCESS); EXPECT_EQ(result2.length, sizeof(data)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_FLOAT_EQ((reinterpret_cast(result2.data.get()))[i].val, data[i].val); } EXPECT_EQ(TransDataType(args2, result2), SUCCESS); @@ -81,7 +81,7 @@ TEST_F(UtestDataTypeTransfer, int32_fp16) { CastArgs args{reinterpret_cast(data), sizeof(ret) / sizeof(ret[0]), DT_INT32, DT_FLOAT16}; EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_FLOAT_EQ((reinterpret_cast(result.data.get()))[i].val, ret[i].val); } @@ -91,7 +91,7 @@ TEST_F(UtestDataTypeTransfer, int32_fp16) { EXPECT_EQ(transfer2.TransDataType(args2, result2), SUCCESS); EXPECT_EQ(result2.length, sizeof(data)); bool is_equal = true; - for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { + for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { if (abs((reinterpret_cast(result2.data.get()))[i] - data[i]) / abs(data[i]) > 0.05) { is_equal = false; break; @@ -154,7 +154,7 @@ TEST_F(UtestDataTypeTransfer, fp32_fp16) { EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); bool is_equal = true; - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { if (abs((reinterpret_cast(result.data.get()))[i] - ret[i]) > 1.0e-6) { is_equal = false; break; @@ -167,7 +167,7 @@ TEST_F(UtestDataTypeTransfer, fp32_fp16) { CastArgs args2{reinterpret_cast(ret), sizeof(data) / sizeof(data[0]), DT_FLOAT, DT_FLOAT16}; EXPECT_EQ(transfer2.TransDataType(args2, result2), SUCCESS); EXPECT_EQ(result2.length, sizeof(data)); - for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { + for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { EXPECT_FLOAT_EQ((reinterpret_cast(result2.data.get()))[i].val, data[i].val); } } @@ -238,7 +238,7 @@ TEST_F(UtestDataTypeTransfer, uint8_fp32) { DataTypeTransfer transfer; EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } @@ -259,7 +259,7 @@ TEST_F(UtestDataTypeTransfer, uint8_int32) { DataTypeTransfer transfer; EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } @@ -282,7 +282,7 @@ TEST_F(UtestDataTypeTransfer, fp32_int32) { DataTypeTransfer transfer; EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_FLOAT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } @@ -304,7 +304,7 @@ TEST_F(UtestDataTypeTransfer, int32_fp32) { DataTypeTransfer transfer; EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_FLOAT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } @@ -329,7 +329,7 @@ TEST_F(UtestDataTypeTransfer, int32_uint8) { DataTypeTransfer transfer; EXPECT_EQ(transfer.TransDataType(args, result), SUCCESS); EXPECT_EQ(result.length, sizeof(ret)); - for (int i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { + for (size_t i = 0; i < sizeof(ret) / sizeof(ret[0]); ++i) { EXPECT_FLOAT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } diff --git a/tests/ut/ge/generator/ge_generator_unittest.cc b/tests/ut/ge/generator/ge_generator_unittest.cc index 1bb4430f..b3abb2f9 100644 --- a/tests/ut/ge/generator/ge_generator_unittest.cc +++ b/tests/ut/ge/generator/ge_generator_unittest.cc @@ -83,12 +83,16 @@ TEST_F(UtestGeGenerator, test_build_single_op_offline) { graphStatus TestFunc(Operator &op) { return 0; } graphStatus TestFunc1(Operator &op) { return 1; } TEST_F(UtestGeGenerator, test_infer_format_for_single_op) { + ComputeGraphPtr compute_graph = MakeShared("graph_name"); + auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); OperatorFactoryImpl::RegisterInferFormatFunc("Add", TestFunc); shared_ptr op_desc = make_shared("add", "add"); + compute_graph->AddNode(op_desc); GeGenerator generator; - EXPECT_EQ(generator.InferFormatForSingleOp(op_desc), SUCCESS); + EXPECT_EQ(generator.InferFormatForSingleOp(op_desc, graph), SUCCESS); shared_ptr op_desc1 = make_shared("Add", "Add"); - EXPECT_EQ(generator.InferFormatForSingleOp(op_desc1), SUCCESS); + compute_graph->AddNode(op_desc1); + EXPECT_EQ(generator.InferFormatForSingleOp(op_desc1, graph), SUCCESS); OperatorFactoryImpl::RegisterInferFormatFunc("MatMulV2", TestFunc1); shared_ptr op_desc2 = make_shared("MatMulV2", "MatMulV2"); GeTensorDesc tensor_desc; @@ -99,7 +103,8 @@ TEST_F(UtestGeGenerator, test_infer_format_for_single_op) { EXPECT_EQ(op_desc2->AddInputDesc(tensor_desc), GRAPH_SUCCESS); EXPECT_EQ(op_desc2->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); EXPECT_EQ(op_desc2->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); - EXPECT_EQ(generator.InferFormatForSingleOp(op_desc2), FAILED); + compute_graph->AddNode(op_desc2); + EXPECT_EQ(generator.InferFormatForSingleOp(op_desc2, graph), FAILED); } TEST_F(UtestGeGenerator, test_build_single_op_online) { diff --git a/tests/ut/ge/graph/execute/model_executor_unittest.cc b/tests/ut/ge/graph/execute/model_executor_unittest.cc new file mode 100644 index 00000000..d4e0e3a4 --- /dev/null +++ b/tests/ut/ge/graph/execute/model_executor_unittest.cc @@ -0,0 +1,327 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define protected public +#define private public +#include "graph/execute/model_executor.h" +#include "graph/manager/graph_manager.h" +#include "graph/load/model_manager/model_manager.h" +#include "graph/load/model_manager/davinci_model.h" + +using namespace std; + +namespace ge { +class UtestModelExecutorTest : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) { + OpDescPtr op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + static int32_t index = 0; + op_desc->SetId(index++); + + GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); + TensorUtils::SetSize(tensor, 64); + vector input_offset; + for (int i = 0; i < in_num; i++) { + op_desc->AddInputDesc(tensor); + input_offset.emplace_back(index * 64 + i * 64); + } + op_desc->SetInputOffset(input_offset); + + vector output_offset; + for (int i = 0; i < out_num; i++) { + op_desc->AddOutputDesc(tensor); + output_offset.emplace_back(index * 64 + in_num * 64 + i * 64); + } + op_desc->SetOutputOffset(output_offset); + + op_desc->SetWorkspace({}); + op_desc->SetWorkspaceBytes({}); + op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE"); + + return graph.AddNode(op_desc); +} + +TEST_F(UtestModelExecutorTest, test_load_graph_sync) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + auto compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + + GeModelPtr ge_model = MakeShared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(compute_graph)); + ge_root_model->SetSubgraphInstanceNameToModel(compute_graph->GetName(), ge_model); + + GraphId graph_id = 1; + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(true); + graph_node->SetAsync(false); + + EXPECT_EQ(model_executor.LoadGraph(ge_root_model, graph_node), SUCCESS); + EXPECT_EQ(model_executor.UnloadGraph(ge_root_model, graph_id), SUCCESS); + + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, test_load_graph_async) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + Graph graph("test_graph"); + auto compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + + GeModelPtr ge_model = MakeShared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(compute_graph)); + ge_root_model->SetSubgraphInstanceNameToModel(compute_graph->GetName(), ge_model); + + GraphId graph_id = 1; + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(true); + graph_node->SetAsync(true); + + EXPECT_EQ(model_executor.LoadGraph(ge_root_model, graph_node), SUCCESS); + + EXPECT_EQ(model_executor.UnloadGraph(ge_root_model, graph_id), SUCCESS); + + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, test_load_graph_failed) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + Graph graph("test_graph"); + auto compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + + GraphId graph_id = 1; + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(true); + graph_node->SetAsync(true); + + // GeModel is null, DavinciModel::Assign will return FAILED + setenv(kEnvGeuseStaticMemory, "1", true); + EXPECT_EQ(model_executor.LoadGraph(ge_root_model, graph_node), FAILED); + EXPECT_EQ(model_executor.UnloadGraph(ge_root_model, graph_id), SUCCESS); + + EXPECT_EQ(model_executor.Finalize(), SUCCESS); + unsetenv(kEnvGeuseStaticMemory); +} + +TEST_F(UtestModelExecutorTest, test_check_and_release_memory) { + { + auto listener = MakeShared(); + shared_ptr davinci_model1 = MakeShared(1, listener); + davinci_model1->SetId(1); + ModelManager::GetInstance()->InsertModel(1, davinci_model1); + shared_ptr davinci_model2 = MakeShared(2, listener); + davinci_model1->SetId(2); + ModelManager::GetInstance()->InsertModel(2, davinci_model2); + } + + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + GeModelPtr ge_model = make_shared(); + int64_t memory_size = 25 * 1024UL * 1024UL * 1024UL; + int64_t weight_size = 25 * 1024UL * 1024UL * 1024UL; + uint64_t session_id = 0; + EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, memory_size)); + EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_WEIGHT_SIZE, weight_size)); + EXPECT_TRUE(AttrUtils::SetInt(ge_model, MODEL_ATTR_SESSION_ID, session_id)); + + GraphId graph_id = 1; + GraphNodePtr graph_node = MakeShared(graph_id); + model_executor.AddGraphNode(graph_id, graph_node); + + ComputeGraphPtr compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + ge_root_model->SetModelId(1); + ge_root_model->SetModelId(2); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(true); + + EXPECT_EQ(model_executor.CheckAndReleaseMemory(ge_model, graph_node), SUCCESS); + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, parse_inputs_dims_data) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + OmeContext context; + SetLocalOmeContext(context); + ComputeGraphPtr compute_graph = MakeShared("test_graph"); + const auto data1 = CreateNode(*compute_graph, DATA, "data1", 1, 1); + const auto next1 = CreateNode(*compute_graph, GETNEXT, "data1", 1, 1); + + Tensor tensor; + std::vector input_tensors; + input_tensors.emplace_back(tensor); + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), SUCCESS); // dynamic_node_type is empty, just return + + context.dynamic_node_type = DATA; + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), SUCCESS); // ParseInputsDimsForData + + context.getnext_nosink_nodes.emplace_back(next1); + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), SUCCESS); // ParseInputsDimsForGetNexNosinkAndData + + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, parse_inputs_dims_getnext) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + OmeContext context; + SetLocalOmeContext(context); + ComputeGraphPtr compute_graph = MakeShared("test_graph"); + const auto data1 = CreateNode(*compute_graph, DATA, "data1", 1, 1); + const auto next1 = CreateNode(*compute_graph, GETNEXT, "data1", 1, 1); + + Tensor tensor; + std::vector input_tensors; + input_tensors.emplace_back(tensor); + + context.dynamic_node_type = GETNEXT; + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), SUCCESS); // just getnext_sink + + context.getnext_nosink_nodes.emplace_back(next1); + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), SUCCESS); // ParseInputsDimsForData + + context.data_nodes.emplace_back(data1); + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), PARAM_INVALID); // ParseInputsDimsForGetNexNosinkAndData + AttrUtils::SetInt(next1->GetOpDesc(), ATTR_NAME_INDEX, 0); + EXPECT_EQ(model_executor.ParseInputsDims(input_tensors), SUCCESS); // ParseInputsDimsForGetNexNosinkAndData + + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, test_run_thread) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + GraphId graph_id = 1; + uint64_t session_id = 0; + error_message::Context error_context; + GEThreadLocalContext context; + const auto callback = [](Status status, std::vector &outputs) { }; + + auto compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + + GeModelPtr ge_model = MakeShared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(compute_graph)); + ge_root_model->SetSubgraphInstanceNameToModel(compute_graph->GetName(), ge_model); + + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(false); + graph_node->SetAsync(true); + graph_node->IncreaseLoadCount(); + graph_node->Lock(); + + Tensor tensor; + std::vector input_tensors; + input_tensors.emplace_back(tensor); + + RunArgs run_args{graph_node, graph_id, session_id, error_context, input_tensors, ge_root_model, context, callback}; + EXPECT_EQ(model_executor.PushGraph(run_args), SUCCESS); + + while (model_executor.run_args_q_.Size() > 0) { + usleep(10); // 0.01ms, Wait for RunThread. + } + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +static void test_run_graph(ModelExecutor &model_executor) { + auto compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + + GeModelPtr ge_model = MakeShared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(compute_graph)); + ge_root_model->SetSubgraphInstanceNameToModel(compute_graph->GetName(), ge_model); + + GraphId graph_id = 1; + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(false); + graph_node->SetAsync(false); // RunGraph is Synchronization. + EXPECT_EQ(model_executor.LoadGraph(ge_root_model, graph_node), SUCCESS); + + std::vector inputs; + std::vector outputs; + EXPECT_EQ(model_executor.RunGraph(graph_node, graph_id, inputs, outputs), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, test_run_graph_train) { + GetThreadLocalContext().SetGlobalOption({{OPTION_GRAPH_RUN_MODE, "1"}}); + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + test_run_graph(model_executor); + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, test_run_graph_infer) { + GetThreadLocalContext().SetGlobalOption({}); + GetThreadLocalContext().SetSessionOption({}); + GetThreadLocalContext().SetGraphOption({}); + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + test_run_graph(model_executor); + EXPECT_EQ(model_executor.Finalize(), SUCCESS); +} + +TEST_F(UtestModelExecutorTest, test_run_graph_with_stream) { + ModelExecutor model_executor; + EXPECT_EQ(model_executor.Initialize({}, 0), SUCCESS); + + GraphId graph_id = 1; + auto compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + + GeModelPtr ge_model = MakeShared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(compute_graph)); + ge_root_model->SetSubgraphInstanceNameToModel(compute_graph->GetName(), ge_model); + + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGeRootModel(ge_root_model); + graph_node->SetLoadFlag(false); + graph_node->SetAsync(true); + + GeTensor tensor; + std::vector inputs{tensor}; + std::vector outputs; + + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + EXPECT_EQ(model_executor.RunGraphWithStream(graph_node, graph_id, stream, inputs, outputs), 145003); + + EXPECT_EQ(model_executor.Finalize(), SUCCESS); + rtStreamDestroy(stream); +} +} // namespace ge diff --git a/tests/ut/ge/graph/load/model_manager_unittest.cc b/tests/ut/ge/graph/load/model_manager_unittest.cc index d9e4eabd..a0644510 100644 --- a/tests/ut/ge/graph/load/model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/model_manager_unittest.cc @@ -78,7 +78,7 @@ class UtestModelManagerModelManager : public testing::Test { const int model_len = 10; data.model_len = sizeof(ModelFileHeader) + model_len; data.model_data = new uint8_t[data.model_len]; - memset((uint8_t *)data.model_data + sizeof(ModelFileHeader), 10, model_len); + memset((uint8_t *)data.model_data + sizeof(ModelFileHeader), 0, model_len); ModelFileHeader *header = (ModelFileHeader *)data.model_data; header->magic = MODEL_FILE_MAGIC_NUM; @@ -93,7 +93,7 @@ class UtestModelManagerModelManager : public testing::Test { data.key = ENC_KEY; data.model_data = new uint8_t[data.model_len]; uint8_t data_ori[model_len]; - memset(data_ori, 10, model_len); + memset(data_ori, 0, model_len); ModelFileHeader *header = (ModelFileHeader *)data.model_data; header->magic = MODEL_FILE_MAGIC_NUM; header->version = MODEL_VERSION; @@ -224,6 +224,7 @@ TEST_F(UtestModelManagerModelManager, case_load_model_encypt_type_unsupported) { ModelFileHeader *header = (ModelFileHeader *)data.model_data; header->is_encrypt = 255; uint32_t model_id = 1; + // Error for: LoadModelPartitionTable: Invalid partition_table->num:0 EXPECT_EQ(mm.LoadModelOffline(model_id, data, nullptr, nullptr), ACL_ERROR_GE_PARAM_INVALID); delete[](uint8_t *) data.model_data; } diff --git a/tests/ut/ge/graph/manager/graph_manager_unittest.cc b/tests/ut/ge/graph/manager/graph_manager_unittest.cc index 9bae10eb..9663e90f 100644 --- a/tests/ut/ge/graph/manager/graph_manager_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_manager_unittest.cc @@ -15,20 +15,9 @@ */ #include + #include #include -#define protected public -#define private public -#include "graph/manager/graph_manager.h" -#include "graph/load/model_manager/model_manager.h" -#include "graph/load/model_manager/davinci_model.h" -#define const -#include "common/helper/model_cache_helper.h" -#undef const -#include "init/gelib.h" -#undef private -#undef public - #include #include #include @@ -38,6 +27,14 @@ #include #include +#define protected public +#define private public +#include "graph/manager/graph_manager.h" +#define const +#include "common/helper/model_cache_helper.h" +#undef const +#include "init/gelib.h" + #include "common/math/math_util.h" #include "common/thread_pool.h" #include "common/dump/dump_manager.h" @@ -121,7 +118,6 @@ using namespace std; using namespace testing; -using namespace ge; using namespace domi; namespace { @@ -129,6 +125,8 @@ const uint32_t kNotAdded = 0; const uint32_t kStartAdd = 1; const uint32_t kDoneAdded = 2; } + +namespace ge { class UtestGraphManagerTest : public testing::Test { protected: void SetUp() {} @@ -136,6 +134,31 @@ class UtestGraphManagerTest : public testing::Test { void TearDown() {} }; +class StubExecutor : public Executor { + public: + Status LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { + return SUCCESS; + } + + Status UnloadGraph(const GeRootModelPtr &ge_root_model, uint32_t graph_id) { + return SUCCESS; + } + + Status PushGraph(const RunArgs &args) { + return SUCCESS; + } + + Status RunGraph(const GraphNodePtr &graph_node, GraphId graph_id, + const std::vector &inputs, std::vector &outputs) { + return SUCCESS; + } + + Status RunGraphWithStream(const GraphNodePtr &graph_node, GraphId graph_id, rtStream_t stream, + const std::vector &inputs, std::vector &outputs){ + return SUCCESS; + } +}; + void CreateGraph(Graph &graph) { TensorDesc desc(ge::Shape({1, 3, 224, 224})); uint32_t size = desc.GetShape().GetShapeSize(); @@ -288,26 +311,20 @@ TEST_F(UtestGraphManagerTest, test_remove_graph_1) { TEST_F(UtestGraphManagerTest, test_remove_graph_2) { GraphId graph_id = 1; GraphManager graph_manager; + StubExecutor stub_executor; + graph_manager.executor_ = &stub_executor; + GraphNodePtr graph_node = MakeShared(graph_id); Graph graph("test_graph"); CreateGraph(graph); auto compute_graph = GraphUtils::GetComputeGraph(graph); GeRootModelPtr ge_root_model = MakeShared(compute_graph); - auto model_manager = ModelManager::GetInstance(); - auto listener = MakeShared(); - shared_ptr davinci_model1 = MakeShared(1, listener); - davinci_model1->SetId(1); - shared_ptr davinci_model2 = MakeShared(2, listener); - davinci_model1->SetId(2); - model_manager->InsertModel(1, davinci_model1); - model_manager->InsertModel(2, davinci_model2); ge_root_model->SetModelId(1); ge_root_model->SetModelId(2); graph_node->SetGeRootModel(ge_root_model); graph_node->SetLoadFlag(true); graph_manager.AddGraphNode(graph_id, graph_node); - Status status = graph_manager.RemoveGraph(graph_id); - EXPECT_EQ(status, ge::SUCCESS); + EXPECT_EQ(graph_manager.RemoveGraph(graph_id), SUCCESS); } TEST_F(UtestGraphManagerTest, test_pre_run_thread) { @@ -327,7 +344,7 @@ TEST_F(UtestGraphManagerTest, test_pre_run_thread) { GraphNodePtr graph_node = MakeShared(graph_id); graph_manager.AddGraphNode(graph_id, graph_node); - graph_manager.PreRunThread(&graph_manager); + graph_manager.PreRunThread(); // end with failed } @@ -355,48 +372,10 @@ TEST_F(UtestGraphManagerTest, test_pre_run_thread_2) { graph_manager.AddGraphNode(graph_id, graph_node_2); ret = graph_manager.prerun_args_q_.Push({graph_id, input_tensor, session_id, error_context, context, callback}); EXPECT_EQ(ret, true); - graph_manager.PreRunThread(&graph_manager); + graph_manager.PreRunThread(); // end with failed } -TEST_F(UtestGraphManagerTest, test_check_and_release_memory) { - - GraphManager graph_manager; - GeModelPtr ge_model = make_shared(); - int64_t memory_size = 25 * 1024UL * 1024UL * 1024UL; - int64_t weight_size = 25 * 1024UL * 1024UL * 1024UL; - uint64_t session_id = 0; - ge::AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, memory_size); - ge::AttrUtils::SetInt(ge_model, ATTR_MODEL_WEIGHT_SIZE, weight_size); - ge::AttrUtils::SetInt(ge_model, MODEL_ATTR_SESSION_ID, session_id); - - - GraphId graph_id = 1; - GraphNodePtr graph_node = MakeShared(graph_id); - graph_manager.AddGraphNode(graph_id, graph_node); - graph_manager.IncreaseGraphCount(graph_id); - graph_manager.IncreaseGraphCount(graph_id); - - auto model_manager = ModelManager::GetInstance(); - auto listener = MakeShared(); - shared_ptr davinci_model1 = MakeShared(1, listener); - davinci_model1->SetId(1); - shared_ptr davinci_model2 = MakeShared(2, listener); - davinci_model1->SetId(2); - model_manager->InsertModel(1, davinci_model1); - model_manager->InsertModel(2, davinci_model2); - ComputeGraphPtr compute_graph = MakeShared("test_graph"); - bool is_dynamic_shape = false; - (void)AttrUtils::GetBool(compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); - GeRootModelPtr ge_root_model = MakeShared(compute_graph); - ge_root_model->SetModelId(1); - ge_root_model->SetModelId(2); - graph_node->SetGeRootModel(ge_root_model); - graph_node->SetLoadFlag(true); - Status status = graph_manager.CheckAndReleaseMemory(ge_model, graph_node); - EXPECT_EQ(status, ge::SUCCESS); -} - TEST_F(UtestGraphManagerTest, test_check_incre_build_and_pre_run_1) { // no need to build GraphId graph_id = 1; @@ -406,7 +385,7 @@ TEST_F(UtestGraphManagerTest, test_check_incre_build_and_pre_run_1) { GraphManager::PreRunArgs arg; GraphNodePtr graph_node = MakeShared(graph_id); graph_node->SetBuildFlag(true); - Status status = graph_manager.CheckIncreBuildAndPreRun(&graph_manager, arg, graph_node, ge_root_model); + Status status = graph_manager.CheckIncreBuildAndPreRun(arg, graph_node, ge_root_model); EXPECT_EQ(status, ge::SUCCESS); } @@ -422,7 +401,7 @@ TEST_F(UtestGraphManagerTest, test_check_incre_build_and_pre_run_2) { graph_node->SetBuildFlag(true); graph_node->Lock(); graph_manager.var_acc_ctrl_.graph_ids_need_rebuild_.insert(graph_id); - Status status = graph_manager.CheckIncreBuildAndPreRun(&graph_manager, arg, graph_node, ge_root_model); + Status status = graph_manager.CheckIncreBuildAndPreRun(arg, graph_node, ge_root_model); EXPECT_EQ(status, ge::PARAM_INVALID); } @@ -437,7 +416,7 @@ TEST_F(UtestGraphManagerTest, test_check_incre_build_and_pre_run_3) { GraphNodePtr graph_node = MakeShared(graph_id); graph_node->SetBuildFlag(false); graph_node->Lock(); - Status status = graph_manager.CheckIncreBuildAndPreRun(&graph_manager, arg, graph_node, ge_root_model); + Status status = graph_manager.CheckIncreBuildAndPreRun(arg, graph_node, ge_root_model); EXPECT_NE(status, ge::SUCCESS); } @@ -471,14 +450,6 @@ TEST_F(UtestGraphManagerTest, test_add_graph_with_copy_fail) { EXPECT_NE(status, ge::SUCCESS); } -TEST_F(UtestGraphManagerTest, ParseInputsDimsForData_success) { - GraphManager graph_manager; - std::vector input_tensors; - ge::Tensor tensor; - input_tensors.emplace_back(tensor); - graph_manager.ParseInputsDimsForData(input_tensors); -} - TEST_F(UtestGraphManagerTest, test_prerunthread_failed_1) { GraphId graph_id = 1; GraphManager graph_manager; @@ -509,7 +480,7 @@ TEST_F(UtestGraphManagerTest, test_prerunthread_failed_1) { graph_node->SetRunFlag(false); // function return. graph_manager.prerun_args_q_.Push(args); - auto t1 = std::thread(GraphManager::PreRunThread, &graph_manager); + auto t1 = std::thread(&GraphManager::PreRunThread, &graph_manager); if (t1.joinable()) { t1.join(); } @@ -549,7 +520,7 @@ TEST_F(UtestGraphManagerTest, test_prerunthread_failed_2) { int ret = setenv("ENABLE_NETWORK_ANALYSIS_DEBUG", "1", 1); EXPECT_EQ(ret, 0); graph_manager.prerun_args_q_.Push(args); - auto t1 = std::thread(GraphManager::PreRunThread, &graph_manager); + auto t1 = std::thread(&GraphManager::PreRunThread, &graph_manager); if (t1.joinable()) { t1.join(); } @@ -593,3 +564,4 @@ TEST_F(UtestGraphManagerTest, ChangeAndDeleteConst_success) { auto all_nodes = graph->GetDirectNode(); EXPECT_EQ(all_nodes.size(), 3); } +} // namespace ge diff --git a/tests/ut/ge/graph/passes/folding_kernel/gather_v2_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/gather_v2_kernel_unittest.cc index 0083146b..ad165d25 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/gather_v2_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/gather_v2_kernel_unittest.cc @@ -92,7 +92,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis0VersionA) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -139,7 +139,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis0VersionB) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {3, 3}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -186,7 +186,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT64Axis0) { GeTensorPtr tensor_out = outputs[0]; int64_t *data_buf = (int64_t *)tensor_out->GetData().data(); vector expect_out = {3, 3}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -233,7 +233,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis0) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {11, 12, 13, 14, 15, 16, 17, 18, 19, 11, 12, 13, 14, 15, 16, 17, 18, 19}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -279,7 +279,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis0And1) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {11, 12, 13, 14, 15, 16, 17, 18, 19, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -327,7 +327,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis1) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {4, 5, 6, 4, 5, 6, 14, 15, 16, 14, 15, 16}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -374,7 +374,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis2) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {1, 1, 4, 4, 7, 7, 11, 11, 14, 14, 17, 17}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -422,7 +422,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT32Axis3) { GeTensorPtr tensor_out = outputs[0]; int32_t *data_buf = (int32_t *)tensor_out->GetData().data(); vector expect_out = {1, 2, 4, 5, 7, 8, 11, 12, 14, 15, 17, 18, 1, 2, 4, 5, 7, 8, 11, 12, 14, 15, 17, 18}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -470,7 +470,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT8Axis0) { GeTensorPtr tensor_out = outputs[0]; int8_t *data_buf = (int8_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -517,7 +517,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, INT16Axis0) { GeTensorPtr tensor_out = outputs[0]; int16_t *data_buf = (int16_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -564,7 +564,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, UINT8Axis0) { GeTensorPtr tensor_out = outputs[0]; uint8_t *data_buf = (uint8_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -611,7 +611,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, UINT16Axis0) { GeTensorPtr tensor_out = outputs[0]; uint16_t *data_buf = (uint16_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -658,7 +658,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, UINT32Axis0) { GeTensorPtr tensor_out = outputs[0]; uint32_t *data_buf = (uint32_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -705,7 +705,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, UINT64Axis0) { GeTensorPtr tensor_out = outputs[0]; uint64_t *data_buf = (uint64_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { EXPECT_EQ(*(data_buf + i), expect_out[i]); } } @@ -753,7 +753,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, DoubleAxis0) { GeTensorPtr tensor_out = outputs[0]; double *data_buf = (double *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { double diff = *(data_buf + i) - expect_out[i]; bool is_same = fabs(diff) < 0.0001 ? true : false; EXPECT_EQ(is_same, true); @@ -802,7 +802,7 @@ TEST_F(UtestGraphPassesFoldingKernelGatherV2Kernel, Float16Axis0) { GeTensorPtr tensor_out = outputs[0]; fp16_t *data_buf = (fp16_t *)tensor_out->GetData().data(); vector expect_out = {2, 2}; - for (int i = 0; i < expect_out.size(); i++) { + for (size_t i = 0; i < expect_out.size(); i++) { double diff = (double)*(data_buf + i) - (double)expect_out[i]; bool is_same = fabs(diff) < 0.0001 ? true : false; EXPECT_EQ(is_same, true); diff --git a/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc index 5157e510..c7d36582 100644 --- a/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc @@ -33,7 +33,7 @@ protected: void SetUp() {} void TearDown() {} public: - NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + NodePtr MakeNode(const ComputeGraphPtr &graph, int in_num, int out_num, string name, string type) { GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); auto op_desc = std::make_shared(name, type); for (auto i = 0; i < in_num; ++i) { diff --git a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc index 1b75a613..c752cea4 100644 --- a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc @@ -45,7 +45,7 @@ protected: } public: - NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + NodePtr MakeNode(const ComputeGraphPtr &graph, int in_num, int out_num, string name, string type) { GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); auto op_desc = std::make_shared(name, type); for (auto i = 0; i < in_num; ++i) { diff --git a/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc b/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc index 00157395..c633c0e1 100644 --- a/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc @@ -32,7 +32,7 @@ class UtestSubgraphConstMigrationPass : public testing::Test { void TearDown() {} public: - NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + NodePtr MakeNode(const ComputeGraphPtr &graph, int in_num, int out_num, string name, string type) { GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); auto op_desc = std::make_shared(name, type); for (auto i = 0; i < in_num; ++i) { diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index d1c51c67..1d1c4fa9 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -153,7 +153,6 @@ TEST_F(UtestGeHybrid, task_update_tiling_info) { ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); ge::AttrUtils::SetBool(op_desc, "support_dynamicshape", true); ge::AttrUtils::SetInt(op_desc, "op_para_size", 1); - ge::AttrUtils::SetStr(op_desc, TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF"); auto node = graph->AddNode(op_desc); std::unique_ptr node_item; diff --git a/tests/ut/ge/hybrid/known_node_executor_unittest.cc b/tests/ut/ge/hybrid/known_node_executor_unittest.cc index 98e985f7..b6d06f5d 100644 --- a/tests/ut/ge/hybrid/known_node_executor_unittest.cc +++ b/tests/ut/ge/hybrid/known_node_executor_unittest.cc @@ -27,6 +27,7 @@ #undef protected #include "graph/manager/graph_mem_allocator.h" #include "../graph/passes/graph_builder_utils.h" +#include "../inc/graph/utils/graph_utils.h" using namespace std; using namespace testing; @@ -48,6 +49,34 @@ class KnownNodeTaskMock : public KnownNodeTask { }; } +static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { + auto op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + op_desc->SetId(0); + + op_desc->SetWorkspace({}); + ; + op_desc->SetWorkspaceBytes({}); + op_desc->SetInputOffset({}); + op_desc->SetOutputOffset({}); + + ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); + bool support_dynamic = true; + ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); + return op_desc; +} + +static ComputeGraphPtr BuildDataDirectConnectGraph() { + const char *kRefIndex = "_parent_node_index"; + ge::ut::GraphBuilder builder("subgraph"); + auto data = builder.AddNode("Data", "Data", 1, 1); + auto netoutput = builder.AddNode("NetOutput", "NetOutput", 1, 1); + (void)AttrUtils::SetInt(netoutput->GetOpDesc()->MutableInputDesc(0), kRefIndex, 0); + + builder.AddDataEdge(data, 0, netoutput, 0); + return builder.GetGraph(); +} + TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) { auto davinci_model = std::make_shared(0, nullptr); davinci_model->SetDeviceId(0); @@ -88,4 +117,29 @@ TEST_F(UnknownNodeExecutorTest, TestParseAttrForAllocatingOutputs) { ASSERT_EQ(node_item.ref_outputs[1], const_node); ASSERT_EQ(node_item.reuse_inputs.size(), 1); ASSERT_EQ(node_item.reuse_inputs[0], 0); -} \ No newline at end of file +} + +TEST_F(UnknownNodeExecutorTest, TestSetGlobalStep) { + OpDescPtr op_desc = CreateOpDesc("PartitionedCall", "PartitionedCall"); + auto root_graph = make_shared("root_graph"); + auto node = root_graph->AddNode(op_desc); + node->SetOwnerComputeGraph(root_graph); + auto sub_graph = BuildDataDirectConnectGraph(); + sub_graph->SetParentGraph(root_graph); + sub_graph->SetParentNode(node); + node->GetOpDesc()->AddSubgraphName("subgraph"); + node->GetOpDesc()->SetSubgraphInstanceName(0, "subgraph"); + root_graph->AddSubgraph("subgraph", sub_graph); + + GeRootModelPtr ge_root_model = make_shared(root_graph); + HybridModel hybrid_model(ge_root_model); + auto *step_id = new int64_t[1]; + step_id[0] = 520; + std::unique_ptr tensor_buf; + tensor_buf = tensor_buf->Create((void *)step_id, sizeof(int64_t)); + hybrid_model.global_step_ = std::move(tensor_buf); + KnownNodeExecutor known_node_executor; + std::shared_ptr davinci_model = MakeShared(0, nullptr); + known_node_executor.SetDaviciModel(hybrid_model, node, davinci_model); + EXPECT_EQ(*(static_cast(davinci_model->global_step_addr_)), 520); +} diff --git a/tests/ut/ge/session/ge_api_unittest.cc b/tests/ut/ge/session/ge_api_unittest.cc index 9a7058f3..93e6a52c 100644 --- a/tests/ut/ge/session/ge_api_unittest.cc +++ b/tests/ut/ge/session/ge_api_unittest.cc @@ -26,8 +26,6 @@ #include "proto/ge_ir.pb.h" #include "inc/external/ge/ge_api.h" #include "session/session_manager.h" -#undef protected -#undef private using namespace std; @@ -71,4 +69,114 @@ TEST_F(UtestGeApi, ge_initialize_modify_mixlist) { auto ret = GEInitialize(options); ASSERT_NE(ret, SUCCESS); } + +TEST_F(UtestGeApi, ge_not_initialized) { + EXPECT_EQ(GEFinalize(), SUCCESS); + + std::map options; + std::map ascend_options; + Session session(options); + + GraphId graph_id = 1; + const auto compute_graph = MakeShared("test_graph"); + Graph graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + + EXPECT_EQ(session.AddGraph(graph_id, graph), FAILED); + EXPECT_EQ(session.AddGraph(graph_id, graph, ascend_options), FAILED); + + EXPECT_EQ(session.AddGraphWithCopy(graph_id, graph), FAILED); + EXPECT_EQ(session.AddGraphWithCopy(graph_id, graph, ascend_options), FAILED); + + vector inputs; + vector tensors; + EXPECT_EQ(session.BuildGraph(graph_id, inputs), FAILED); + EXPECT_EQ(session.BuildGraph(graph_id, tensors), FAILED); + + vector outputs; + EXPECT_EQ(session.RunGraph(graph_id, inputs, outputs), FAILED); + EXPECT_EQ(session.RunGraphWithStreamAsync(graph_id, nullptr, inputs, outputs), FAILED); + EXPECT_EQ(session.RunGraphAsync(graph_id, inputs, nullptr), FAILED); + + vector var_inputs; + EXPECT_EQ(session.GetVariables(var_inputs, outputs), FAILED); + + vector var_names; + EXPECT_EQ(session.GetVariables(var_names, outputs), FAILED); + + std::string key; + pCallBackFunc ge_callback; + EXPECT_EQ(session.RegisterCallBackFunc(key, ge_callback), FAILED); + + session::pCallBackFunc session_callback; + EXPECT_EQ(session.RegisterCallBackFunc(key.c_str(), session_callback), FAILED); + + EXPECT_FALSE(session.IsGraphNeedRebuild(graph_id)); + + EXPECT_EQ(session.RemoveGraph(graph_id), FAILED); + EXPECT_EQ(GEFinalize(), SUCCESS); +} + +TEST_F(UtestGeApi, ge_session_ascend_string) { + std::map options; + EXPECT_EQ(GEInitialize(options), SUCCESS); + + Session session(options); + + GraphId graph_id = 1; + const auto compute_graph = MakeShared("test_graph"); + EXPECT_EQ(session.AddGraph(graph_id, GraphUtils::CreateGraphFromComputeGraph(compute_graph)), SUCCESS); + + EXPECT_TRUE(session.IsGraphNeedRebuild(graph_id)); + + EXPECT_EQ(session.RemoveGraph(graph_id), SUCCESS); + + EXPECT_EQ(GEFinalize(), SUCCESS); +} + +TEST_F(UtestGeApi, ge_session_test) { + std::map options; + EXPECT_EQ(GEInitialize(options), SUCCESS); + + std::map ascend_options; + Session session(options); + + GraphId graph_id = 1; + const auto compute_graph = MakeShared("test_graph"); + Graph graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + + EXPECT_EQ(session.AddGraph(graph_id, graph), SUCCESS); + EXPECT_EQ(session.AddGraph(graph_id, graph, ascend_options), SUCCESS); + + EXPECT_EQ(session.AddGraphWithCopy(graph_id, graph), FAILED); + EXPECT_EQ(session.AddGraphWithCopy(graph_id, graph, ascend_options), FAILED); + + vector inputs; + vector tensors; + EXPECT_EQ(session.BuildGraph(graph_id, inputs), FAILED); + EXPECT_EQ(session.BuildGraph(graph_id, tensors), FAILED); + + vector outputs; + EXPECT_EQ(session.RunGraph(graph_id, inputs, outputs), FAILED); + EXPECT_EQ(session.RunGraphWithStreamAsync(graph_id, nullptr, inputs, outputs), FAILED); + EXPECT_EQ(session.RunGraphAsync(graph_id, inputs, nullptr), SUCCESS); // Push to queue. + + vector var_inputs; + EXPECT_EQ(session.GetVariables(var_inputs, outputs), FAILED); + + vector var_names; + EXPECT_EQ(session.GetVariables(var_names, outputs), FAILED); + + std::string key; + pCallBackFunc ge_callback; + EXPECT_EQ(session.RegisterCallBackFunc(key, ge_callback), SUCCESS); + + session::pCallBackFunc session_callback; + EXPECT_EQ(session.RegisterCallBackFunc(key.c_str(), session_callback), SUCCESS); + + EXPECT_TRUE(session.IsGraphNeedRebuild(graph_id)); + + EXPECT_EQ(session.RemoveGraph(graph_id), SUCCESS); + EXPECT_EQ(GEFinalize(), SUCCESS); +} + } // namespace ge diff --git a/tests/ut/ge/session/inner_session_unittest.cc b/tests/ut/ge/session/inner_session_unittest.cc index 0d20f06a..80cc2834 100644 --- a/tests/ut/ge/session/inner_session_unittest.cc +++ b/tests/ut/ge/session/inner_session_unittest.cc @@ -19,21 +19,18 @@ #define private public #define protected public #include "session/inner_session.h" -#undef private -#undef protected - using namespace std; namespace ge { -class Utest_Inner_session : public testing::Test { +class UtestInnerSession : public testing::Test { protected: void SetUp() override {} void TearDown() override {} }; -TEST_F(Utest_Inner_session, build_graph_success) { +TEST_F(UtestInnerSession, build_graph_success) { std::map options; uint64_t session_id = 1; InnerSession inner_seesion(session_id, options); @@ -44,17 +41,15 @@ TEST_F(Utest_Inner_session, build_graph_success) { EXPECT_NE(ret, ge::SUCCESS); } -TEST_F(Utest_Inner_session, initialize) { - std::map options = { - {ge::MODIFY_MIXLIST, "/modify.json"} - }; +TEST_F(UtestInnerSession, initialize) { + std::map options = {}; uint64_t session_id = 1; InnerSession inner_session(session_id, options); - auto ret = inner_session.Initialize(); - EXPECT_NE(ret, ge::SUCCESS); + EXPECT_EQ(inner_session.Initialize(), SUCCESS); + EXPECT_EQ(inner_session.Finalize(), SUCCESS); } -TEST_F(Utest_Inner_session, check_op_precision_mode) { +TEST_F(UtestInnerSession, check_op_precision_mode) { std::map options = { {ge::OP_PRECISION_MODE, "./op_precision_mode.ini"} }; diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index 23269814..7b7a05d8 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -40,6 +40,9 @@ using namespace ge; namespace { constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; +const char *const kEngineNameAiCore = "AIcoreEngine"; +const char *const kEngineNameAiCpu = "aicpu_ascend_kernel"; +const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; } // namespace class UtestSingleOpModel : public testing::Test { @@ -222,6 +225,7 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); model.model_helper_.model_->SetGraph(graph); + model.op_list_[0] = transdata; auto op_desc = transdata->GetOpDesc(); const vector depend_names = { "Data" }; @@ -330,7 +334,10 @@ TEST_F(UtestSingleOpModel, build_dynamic_task) { domi::TaskDef *task_def3 = model_task_def->add_task(); task_def3->set_type(RT_MODEL_TASK_ALL_KERNEL); - string model_data_str = "123456789"; + domi::TaskDef *task_def4 = model_task_def->add_task(); + task_def4->set_type(RT_MODEL_TASK_KERNEL); + + string model_data_str = "dynamic_model"; SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); std::mutex stream_mu; rtStream_t stream = nullptr; @@ -347,9 +354,15 @@ TEST_F(UtestSingleOpModel, build_dynamic_task) { StreamResource *res = new (std::nothrow) StreamResource(1); ASSERT_EQ(model.ParseTasks(), SUCCESS); + model.node_tasks_[node] = { *task_def3, *task_def4 }; + op_desc->SetOpKernelLibName(kEngineNameAiCore); + model.BuildTaskListForDynamicOp(res, single_op); + + model.node_tasks_[node] = { *task_def }; + op_desc->SetOpKernelLibName(kEngineNameAiCpuTf); ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS); - model.tbe_tasks_.clear(); - ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS); - model.aicpu_tasks_[0] = *task_def2; + + model.node_tasks_[node] = { *task_def2 }; + op_desc->SetOpKernelLibName(kEngineNameAiCpu); model.BuildTaskListForDynamicOp(res, single_op); } diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc index 2424d209..3e3160c2 100644 --- a/tests/ut/ge/single_op/single_op_task_unittest.cc +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -154,3 +154,38 @@ TEST_F(UtestSingleOpTask, test_update_ioaddr) { task.tiling_buffer_ = nullptr; } +TEST_F(UtestSingleOpTask, test_atomic_exec) { + auto graph = make_shared("graph"); + auto op_desc = make_shared("Add", "Add"); + GeTensorDesc desc; + op_desc->AddInputDesc(desc); + op_desc->AddOutputDesc(desc); + auto node = graph->AddNode(op_desc); + AtomicAddrCleanOpTask task; + task.op_desc_ = op_desc; + task.node_ = node; + + vector inputs; + vector outputs; + std::vector atomic_output_indices; + ge::AttrUtils::SetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); + ASSERT_EQ(task.InitAtomicAddrCleanIndices(), INTERNAL_ERROR); + atomic_output_indices = { 0 }; + ge::AttrUtils::SetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); + ASSERT_EQ(task.InitAtomicAddrCleanIndices(), INTERNAL_ERROR); + task.arg_size_ = sizeof(void *) * 2; + task.args_.reset(new (std::nothrow) uint8_t[task.arg_size_]); + ASSERT_EQ(task.InitAtomicAddrCleanIndices(), SUCCESS); + ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), ACL_ERROR_GE_PARAM_INVALID); + + ge::DataBuffer data_buffer; + outputs = { data_buffer }; + ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), SUCCESS); + + task.tiling_buffer_ = (void *)0x0001; + ASSERT_EQ(task.UpdateTilingArgs(nullptr), SUCCESS); + task.tiling_buffer_ = nullptr; + + optiling::utils::OpRunInfo run_info(0, true, 0); + task.CalcTilingInfo(run_info); +}