diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 6585677e..540dfa66 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -34,9 +34,6 @@ HybridModelExecutor::~HybridModelExecutor() { if (context_.rt_gen_context != nullptr) { (void) rtCtxDestroy(context_.rt_gen_context); } - if (context_.global_step != nullptr) { - (void) rtFree(context_.global_step); - } } Status HybridModelExecutor::Init() { @@ -51,8 +48,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { auto root_graph_item = model_->GetRootGraphItem(); GE_CHECK_NOTNULL(root_graph_item); - GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, - sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); + if (context_.global_step != nullptr) { + GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, + sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); + } SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); auto ret = ExecuteGraphInternal(executor, args); Cleanup(); @@ -116,8 +115,8 @@ Status HybridModelExecutor::InitExecutionContext() { GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); - GE_CHK_RT_RET(rtMalloc(&context_.global_step, sizeof(uint64_t), RT_MEMORY_HBM)); + context_.global_step = model_->GetGlobalStep(); context_.stream = stream_; context_.model = model_; context_.is_eos_ = false; diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index a0217d52..c7b2eadb 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -357,5 +357,12 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { return GetVariable(node->GetName()); } + +void *HybridModel::GetGlobalStep() const { + if (global_step_ == nullptr) { + return nullptr; + } + return global_step_->GetData(); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 62095d42..627ca732 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -45,6 +45,8 @@ class HybridModel { return root_runtime_param_.session_id; } + void *GetGlobalStep() const; + GeModelPtr GetGeModel(const NodePtr &node) const; NodeItem *MutableNodeItem(const NodePtr &node); @@ -158,6 +160,7 @@ class HybridModel { std::map> weight_buffer_map_; RuntimeParam root_runtime_param_; string om_name_; + std::unique_ptr global_step_; }; } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 1be76331..6e43007f 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1568,6 +1568,10 @@ Status HybridModelBuilder::InitModelMem() { } runtime_param_.var_base = hybrid_model_.var_mem_base_; + auto allocator = NpuMemoryAllocator::GetAllocator(); + GE_CHECK_NOTNULL(allocator); + hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t)); + GE_CHECK_NOTNULL(hybrid_model_.global_step_); return SUCCESS; } diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 1c46db20..9214f685 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -18,6 +18,7 @@ #include "cce/aicpu_engine_struct.h" #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" +#include "common/dump/dump_manager.h" #include "common/ge/ge_util.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" @@ -110,15 +111,6 @@ Status KnownNodeTask::Init(TaskContext &context) { GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); } - if (!load_flag_) { - auto dump_properties = context.GetDumpProperties(); - if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { - davinci_model_->SetDumpProperties(dump_properties); - void *global_step = context.GetExecutionContext()->global_step; - davinci_model_->SetKnownShapeGlobalStep(global_step); - } - load_flag_ = true; - } GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), davinci_model_->Id(), davinci_model_->SubModelId()), "KnownNodeTask::Init destroy aicpu kernel failed."); @@ -126,13 +118,21 @@ Status KnownNodeTask::Init(TaskContext &context) { return SUCCESS; } -Status KnownNodeTask::InitDavinciModel() { +Status KnownNodeTask::InitDavinciModel(const HybridModel &model) { GELOGD("[Init][Model] start"); davinci_model_->InitRuntimeParams(); GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); int32_t device_id = 0; GE_CHK_RT_RET(rtGetDevice(&device_id)); davinci_model_->SetDeviceId(static_cast(device_id)); + + auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); + if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { + davinci_model_->SetDumpProperties(dump_properties); + void *global_step = model.GetGlobalStep(); + davinci_model_->SetKnownShapeGlobalStep(global_step); + } + GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model."); GELOGD("[Init][Model] success"); return SUCCESS; @@ -180,7 +180,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node auto known_node_task = MakeShared(davinci_model); GE_CHECK_NOTNULL(known_node_task); - GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel()); + GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model)); GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); task = std::move(known_node_task); return SUCCESS; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 5eed528a..75d83743 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -36,7 +36,7 @@ class KnownNodeTask : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; Status Init(TaskContext &context) override; - Status InitDavinciModel(); + Status InitDavinciModel(const HybridModel &model); protected: virtual Status DoInitDavinciModel(); diff --git a/tests/ut/ge/hybrid/known_node_executor_unittest.cc b/tests/ut/ge/hybrid/known_node_executor_unittest.cc index 67a8e323..e41dcecf 100644 --- a/tests/ut/ge/hybrid/known_node_executor_unittest.cc +++ b/tests/ut/ge/hybrid/known_node_executor_unittest.cc @@ -22,6 +22,7 @@ #define protected public #define private public #include "hybrid/node_executor/compiledsubgraph/known_node_executor.h" +#include "common/dump/dump_manager.h" #undef private #undef protected #include "graph/manager/graph_mem_allocator.h" @@ -56,7 +57,11 @@ TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) { AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 1024); davinci_model->Assign(ge_model); + HybridModel model(nullptr); KnownNodeTaskMock mock(davinci_model); + DumpProperties dump_properties; + dump_properties.enable_dump_ = "1"; + DumpManager::GetInstance().AddDumpProperties(model.GetSessionId(), dump_properties); EXPECT_CALL(mock, DoInitDavinciModel).WillOnce(::testing::Return(SUCCESS)); - ASSERT_EQ(mock.InitDavinciModel(), SUCCESS); -} \ No newline at end of file + ASSERT_EQ(mock.InitDavinciModel(model), SUCCESS); +}