| @@ -34,9 +34,6 @@ HybridModelExecutor::~HybridModelExecutor() { | |||||
| if (context_.rt_gen_context != nullptr) { | if (context_.rt_gen_context != nullptr) { | ||||
| (void) rtCtxDestroy(context_.rt_gen_context); | (void) rtCtxDestroy(context_.rt_gen_context); | ||||
| } | } | ||||
| if (context_.global_step != nullptr) { | |||||
| (void) rtFree(context_.global_step); | |||||
| } | |||||
| } | } | ||||
| Status HybridModelExecutor::Init() { | Status HybridModelExecutor::Init() { | ||||
| @@ -51,8 +48,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||||
| auto root_graph_item = model_->GetRootGraphItem(); | auto root_graph_item = model_->GetRootGraphItem(); | ||||
| GE_CHECK_NOTNULL(root_graph_item); | GE_CHECK_NOTNULL(root_graph_item); | ||||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||||
| if (context_.global_step != nullptr) { | |||||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||||
| } | |||||
| SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | ||||
| auto ret = ExecuteGraphInternal(executor, args); | auto ret = ExecuteGraphInternal(executor, args); | ||||
| Cleanup(); | Cleanup(); | ||||
| @@ -116,8 +115,8 @@ Status HybridModelExecutor::InitExecutionContext() { | |||||
| GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | ||||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | ||||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | ||||
| GE_CHK_RT_RET(rtMalloc(&context_.global_step, sizeof(uint64_t), RT_MEMORY_HBM)); | |||||
| context_.global_step = model_->GetGlobalStep(); | |||||
| context_.stream = stream_; | context_.stream = stream_; | ||||
| context_.model = model_; | context_.model = model_; | ||||
| context_.is_eos_ = false; | context_.is_eos_ = false; | ||||
| @@ -357,5 +357,12 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { | |||||
| return GetVariable(node->GetName()); | return GetVariable(node->GetName()); | ||||
| } | } | ||||
| void *HybridModel::GetGlobalStep() const { | |||||
| if (global_step_ == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| return global_step_->GetData(); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -45,6 +45,8 @@ class HybridModel { | |||||
| return root_runtime_param_.session_id; | return root_runtime_param_.session_id; | ||||
| } | } | ||||
| void *GetGlobalStep() const; | |||||
| GeModelPtr GetGeModel(const NodePtr &node) const; | GeModelPtr GetGeModel(const NodePtr &node) const; | ||||
| NodeItem *MutableNodeItem(const NodePtr &node); | NodeItem *MutableNodeItem(const NodePtr &node); | ||||
| @@ -158,6 +160,7 @@ class HybridModel { | |||||
| std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | ||||
| RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
| string om_name_; | string om_name_; | ||||
| std::unique_ptr<TensorBuffer> global_step_; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -1568,6 +1568,10 @@ Status HybridModelBuilder::InitModelMem() { | |||||
| } | } | ||||
| runtime_param_.var_base = hybrid_model_.var_mem_base_; | runtime_param_.var_base = hybrid_model_.var_mem_base_; | ||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
| GE_CHECK_NOTNULL(allocator); | |||||
| hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t)); | |||||
| GE_CHECK_NOTNULL(hybrid_model_.global_step_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/fmk_error_codes.h" | #include "framework/common/fmk_error_codes.h" | ||||
| #include "common/dump/dump_manager.h" | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "graph/attr_value.h" | #include "graph/attr_value.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| @@ -110,15 +111,6 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | ||||
| davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | ||||
| } | } | ||||
| if (!load_flag_) { | |||||
| auto dump_properties = context.GetDumpProperties(); | |||||
| if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
| davinci_model_->SetDumpProperties(dump_properties); | |||||
| void *global_step = context.GetExecutionContext()->global_step; | |||||
| davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
| } | |||||
| load_flag_ = true; | |||||
| } | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | ||||
| davinci_model_->Id(), davinci_model_->SubModelId()), | davinci_model_->Id(), davinci_model_->SubModelId()), | ||||
| "KnownNodeTask::Init destroy aicpu kernel failed."); | "KnownNodeTask::Init destroy aicpu kernel failed."); | ||||
| @@ -126,13 +118,21 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status KnownNodeTask::InitDavinciModel() { | |||||
| Status KnownNodeTask::InitDavinciModel(const HybridModel &model) { | |||||
| GELOGD("[Init][Model] start"); | GELOGD("[Init][Model] start"); | ||||
| davinci_model_->InitRuntimeParams(); | davinci_model_->InitRuntimeParams(); | ||||
| GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); | GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); | ||||
| int32_t device_id = 0; | int32_t device_id = 0; | ||||
| GE_CHK_RT_RET(rtGetDevice(&device_id)); | GE_CHK_RT_RET(rtGetDevice(&device_id)); | ||||
| davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); | davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); | ||||
| auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); | |||||
| if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
| davinci_model_->SetDumpProperties(dump_properties); | |||||
| void *global_step = model.GetGlobalStep(); | |||||
| davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
| } | |||||
| GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model."); | GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model."); | ||||
| GELOGD("[Init][Model] success"); | GELOGD("[Init][Model] success"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -180,7 +180,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||||
| auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); | auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); | ||||
| GE_CHECK_NOTNULL(known_node_task); | GE_CHECK_NOTNULL(known_node_task); | ||||
| GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel()); | |||||
| GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model)); | |||||
| GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); | GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); | ||||
| task = std::move(known_node_task); | task = std::move(known_node_task); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -36,7 +36,7 @@ class KnownNodeTask : public NodeTask { | |||||
| Status UpdateArgs(TaskContext &context) override; | Status UpdateArgs(TaskContext &context) override; | ||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
| Status Init(TaskContext &context) override; | Status Init(TaskContext &context) override; | ||||
| Status InitDavinciModel(); | |||||
| Status InitDavinciModel(const HybridModel &model); | |||||
| protected: | protected: | ||||
| virtual Status DoInitDavinciModel(); | virtual Status DoInitDavinciModel(); | ||||
| @@ -22,6 +22,7 @@ | |||||
| #define protected public | #define protected public | ||||
| #define private public | #define private public | ||||
| #include "hybrid/node_executor/compiledsubgraph/known_node_executor.h" | #include "hybrid/node_executor/compiledsubgraph/known_node_executor.h" | ||||
| #include "common/dump/dump_manager.h" | |||||
| #undef private | #undef private | ||||
| #undef protected | #undef protected | ||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| @@ -56,7 +57,11 @@ TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) { | |||||
| AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 1024); | AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 1024); | ||||
| davinci_model->Assign(ge_model); | davinci_model->Assign(ge_model); | ||||
| HybridModel model(nullptr); | |||||
| KnownNodeTaskMock mock(davinci_model); | KnownNodeTaskMock mock(davinci_model); | ||||
| DumpProperties dump_properties; | |||||
| dump_properties.enable_dump_ = "1"; | |||||
| DumpManager::GetInstance().AddDumpProperties(model.GetSessionId(), dump_properties); | |||||
| EXPECT_CALL(mock, DoInitDavinciModel).WillOnce(::testing::Return(SUCCESS)); | EXPECT_CALL(mock, DoInitDavinciModel).WillOnce(::testing::Return(SUCCESS)); | ||||
| ASSERT_EQ(mock.InitDavinciModel(), SUCCESS); | |||||
| } | |||||
| ASSERT_EQ(mock.InitDavinciModel(model), SUCCESS); | |||||
| } | |||||