From b307b80f2b5c779dc4d5ccc576fdaca97212f846 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Thu, 29 Apr 2021 16:24:12 +0800 Subject: [PATCH] adapt to new session id in multithread infer --- ge/executor/CMakeLists.txt | 3 + ge/graph/load/model_manager/model_manager.cc | 82 ++++++++++++++++--- ge/graph/load/model_manager/model_manager.h | 11 +++ ge/init/gelib.cc | 2 - .../ge/graph/load/model_manager_unittest.cc | 42 ++++++++++ 5 files changed, 126 insertions(+), 14 deletions(-) diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 9bf212a7..856e7cf1 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -19,6 +19,7 @@ set(SRC_LIST "../common/dump/exception_dumper.cc" "../common/dump/dump_manager.cc" "../common/dump/dump_op.cc" + "../common/dump/dump_server.cc" "../common/dump/opdebug_register.cc" "../common/profiling/ge_profiling.cc" "../graph/load/graph_loader.cc" @@ -201,6 +202,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE ${GE_CODE_DIR}/../inc/cce #### blue zone #### ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) target_link_libraries(ge_executor PRIVATE @@ -247,6 +249,7 @@ target_include_directories(ge_executor_shared PRIVATE ${GE_CODE_DIR}/../inc/cce #### blue zone #### ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) target_link_options(ge_executor_shared PRIVATE diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index b731aefe..5bcd308a 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -27,6 +27,7 @@ #include "graph/load/model_manager/davinci_model.h" #include "model/ge_root_model.h" #include "common/formats/utils/formats_trans_utils.h" +#include "toolchain/adx_datadump_server.h" namespace ge { thread_local uint32_t device_count = 0; @@ -48,6 +49,7 @@ const int kTimeSpecNano = 1000000000; const int kTimeSpecMiro = 1000000; const int kOpNameMaxSize = 100; const uint64_t kInferSessionId = 0; +const int32_t kDumpStatus = 0; #pragma pack(push, 1) struct CustAicpuSoBuf { uint64_t kernelSoBuf; @@ -321,6 +323,58 @@ bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) { (void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph); return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag(); } + +bool ModelManager::IsDumpSeverInited(uint64_t session_id) { + auto it = session_id_to_dump_server_init_flag_.find(session_id); + return it != session_id_to_dump_server_init_flag_.end() && it->second; +} + +Status ModelManager::AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties) { + if (!IsDumpSeverInited(session_id)) { + if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { + GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus, + GELOGE(PARAM_INVALID, "[Init][AdxDataDumpServer] failed, session_id:%lu.", session_id); + return PARAM_INVALID) + GELOGI("Init adx data dump server success"); + session_id_to_dump_server_init_flag_[session_id] = true; + } + } + DumpManager::GetInstance().AddDumpProperties(session_id, dump_properties); + return SUCCESS; +} + +Status ModelManager::InitDumPropertiesWithNewSessionId(uint64_t session_id) { + DumpProperties dump_properties; + dump_properties.InitByOptions(); + GE_CHK_STATUS_RET(AddDumpProperties(session_id, dump_properties), "[Add][DumpProperties] failed."); + return SUCCESS; +} + +Status ModelManager::UpdateSessionId(uint32_t model_id, GeModelPtr ge_model, + std::shared_ptr &davinci_model, uint64_t &session_id) { + uint64_t new_session_id; + Status ret = GenSessionId(new_session_id); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); + ret = davinci_model->UpdateSessionId(new_session_id); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); + ge_model->InsertSessionMap(model_id, new_session_id); + GELOGD("Update new session id: %lu.", new_session_id); + session_id = new_session_id; + return SUCCESS; +} + +bool ModelManager::HasVarNode(ComputeGraphPtr &compute_graph) const { + for (ge::NodePtr &node : compute_graph->GetAllNodes()) { + if (node == nullptr) { + continue; + } + if (node->GetType() == VARIABLE) { + return true; + } + } + return false; +} + /// /// @ingroup domi_ome /// @brief load model online @@ -347,10 +401,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetId(model_id); davinci_model->SetDeviceId(GetContext().DeviceId()); - const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(GetContext().SessionId()); - davinci_model->SetDumpProperties(dump_properties); - dump_properties_ = dump_properties; - auto root_graph = ge_root_model->GetRootGraph(); GE_CHECK_NOTNULL(root_graph); string root_model_name = root_graph->GetName(); @@ -364,15 +414,23 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrGetTrainFlag()) { - uint64_t new_session_id; - ret = GenSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); - ret = davinci_model->UpdateSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); - ge_model->InsertSessionMap(model_id, new_session_id); - GELOGD("Update new session id: %lu.", new_session_id); + uint64_t session_id = GetContext().SessionId(); + // Inference graph with variable node is not support for multi-threads scenario + if (!ge_root_model->GetTrainFlag() && !HasVarNode(root_graph)) { + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(UpdateSessionId(model_id, ge_model, davinci_model, session_id) != SUCCESS, + return ret, + "UpdateSessionId failed."); + GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(InitDumPropertiesWithNewSessionId(session_id) != SUCCESS, + GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + return ret, + "Init DumProperties with new session_id failed."); } + + const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(session_id); + davinci_model->SetDumpProperties(dump_properties); + dump_properties_ = dump_properties; + GE_TIMESTAMP_START(Init); GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;); GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit"); diff --git a/ge/graph/load/model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h index e15cf533..c0f14934 100755 --- a/ge/graph/load/model_manager/model_manager.h +++ b/ge/graph/load/model_manager/model_manager.h @@ -345,6 +345,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { void GenModelId(uint32_t *id); + Status InitDumPropertiesWithNewSessionId(uint64_t session_id); + + bool IsDumpSeverInited(uint64_t session_id); + + Status AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties); + + Status UpdateSessionId(uint32_t model_id, GeModelPtr ge_model, + std::shared_ptr &davinci_model, uint64_t &session_id); + + bool HasVarNode(ComputeGraphPtr &compute_graph) const; std::map> model_map_; std::map> hybrid_model_map_; @@ -361,6 +371,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { static DumpProperties dump_properties_; bool dump_exception_flag_ = false; + std::map session_id_to_dump_server_init_flag_; }; } // namespace ge diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 39a18fd1..96ed1b9c 100644 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -60,8 +60,6 @@ static std::shared_ptr instancePtr_ = nullptr; // Initial each module of GE, if one failed, release all Status GELib::Initialize(const map &options) { - - GELOGI("initial start"); GEEVENT("[GEPERFTRACE] GE Init Start"); // Multiple initializations are not allowed diff --git a/tests/ut/ge/graph/load/model_manager_unittest.cc b/tests/ut/ge/graph/load/model_manager_unittest.cc index 0cbe61b5..de891072 100644 --- a/tests/ut/ge/graph/load/model_manager_unittest.cc +++ b/tests/ut/ge/graph/load/model_manager_unittest.cc @@ -25,6 +25,7 @@ #include "common/op/ge_op_utils.h" #include "graph/load/graph_loader.h" #include "graph/load/model_manager/davinci_model.h" +#include "graph/ops_stub.h" using namespace std; using namespace testing; @@ -56,6 +57,23 @@ class UtestModelManagerModelManager : public testing::Test { void TearDown() {} + void CreateGraph(Graph &graph) { + TensorDesc desc(ge::Shape({1, 3, 224, 224})); + uint32_t size = desc.GetShape().GetShapeSize(); + desc.SetSize(size); + auto data = op::Data("Data").set_attr_index(0); + data.update_input_desc_data(desc); + data.update_output_desc_out(desc); + + auto flatten = op::Flatten("Flatten").set_input_x(data, data.name_out_out()); + + std::vector inputs{data}; + std::vector outputs{flatten}; + std::vector targets{flatten}; + // Graph graph("test_graph"); + graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets); + } + void GenUnencryptModelData(ModelData &data) { const int model_len = 10; data.model_len = sizeof(ModelFileHeader) + model_len; @@ -420,4 +438,28 @@ TEST_F(UtestModelManagerModelManager, test_data_input_tensor) { auto ret = mm.DataInputTensor(model_id,inputs); EXPECT_EQ(PARAM_INVALID, ret); // HybridDavinciModel::impl_ is null. } + +TEST_F(UtestModelManagerModelManager, test_init_dump_properties_with_new_session_id) { + ModelManager model_manager; + uint64_t session_id = 1; + model_manager.InitDumPropertiesWithNewSessionId(session_id); +} + +TEST_F(UtestModelManagerModelManager, test_update_session_id) { + ModelManager model_manager; + uint32_t model_id = 0; + uint64_t session_id = 0; + GeModelPtr ge_model = MakeShared(); + std::shared_ptr davinci_model = MakeShared(0, nullptr); + model_manager.UpdateSessionId(model_id, ge_model, davinci_model, session_id); +} + +TEST_F(UtestModelManagerModelManager, test_has_var_node) { + ModelManager model_manager; + uint64_t session_id = 1; + Graph graph("test"); + CreateGraph(graph); + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + model_manager.HasVarNode(compute_graph); +} } // namespace ge