From a781b6c3548ec748d869cbb8d51e11d515c322a1 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 29 Jun 2021 16:56:06 +0800 Subject: [PATCH] Fix bug of atomic profiling. --- .../node_executor/aicore/aicore_node_executor.cc | 2 +- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 10 ++++++++++ ge/hybrid/node_executor/aicore/aicore_op_task.h | 4 ++++ ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 2 +- ge/hybrid/node_executor/task_context.cc | 6 +++--- ge/hybrid/node_executor/task_context.h | 4 ++-- .../executor/worker/execution_engine_unittest.cc | 2 +- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 11 ++++++++++- 8 files changed, 32 insertions(+), 9 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index c2ce24a4..7a22a062 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -208,7 +208,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function context.SetTaskId(task_id); context.SetStreamId(stream_id); GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); - (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); + (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim(), (*it)->GetOpType()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index a32f2999..b34cc0c6 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -33,6 +33,7 @@ namespace { constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; constexpr char const *kAttrOpParamSize = "op_para_size"; constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; +const string kAtomicOpType = "DynamicAtomicAddrClean"; std::atomic log_id(0); } // namespace @@ -51,6 +52,7 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr &&holder) { } Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { + op_type_ = op_desc.GetType(); log_name_ = op_desc.GetName() + "_tvmbin"; log_id_ = log_id++; auto op_desc_ptr = MakeShared(op_desc); @@ -538,6 +540,10 @@ const std::string &AiCoreOpTask::GetName() const { return stub_name_; } +const std::string &AiCoreOpTask::GetOpType() const { + return op_type_; +} + std::string AiCoreOpTask::GetKeyForOpParamSize() const { return kAttrOpParamSize; } @@ -631,6 +637,10 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co return op_desc.GetName() + "_atomic_kernelname"; } +const std::string &AtomicAddrCleanOpTask::GetOpType() const { + return kAtomicOpType; +} + Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index b03bd9e4..8d7be0db 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -78,6 +78,8 @@ class AiCoreOpTask { void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; + virtual const std::string& GetOpType() const; + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; @@ -117,12 +119,14 @@ class AiCoreOpTask { uint64_t log_id_ = 0; std::string log_name_; uint32_t offset_ = 0; + std::string op_type_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { public: Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; Status UpdateArgs(TaskContext &task_context) override; + const std::string& GetOpType() const override; protected: std::string GetKeyForOpParamSize() const override; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index c83a76d1..820c9b56 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -207,7 +207,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionSynchronize(GetStream()); } -Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, - const std::string &task_type, uint32_t block_dim) { +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, + uint32_t block_dim, const std::string &op_type) { if (ProfilingManager::Instance().ProfilingModelLoadOn()) { const NodeItem &node_item = GetNodeItem(); auto op_desc = node_item.GetOpDesc(); @@ -589,7 +589,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream TaskDescInfo tmp_task_desc_info; tmp_task_desc_info.model_name = dynamic_model_name; tmp_task_desc_info.op_name = op_desc->GetName(); - tmp_task_desc_info.op_type = op_desc->GetType(); + tmp_task_desc_info.op_type = op_type; tmp_task_desc_info.block_dim = block_dim; tmp_task_desc_info.task_type = task_type; tmp_task_desc_info.task_id = task_id; diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index c96e194e..5304606b 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -118,8 +118,8 @@ class TaskContext { void *handle_ = nullptr; const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } - Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, - const std::string &task_type, uint32_t block_dim); + Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, + uint32_t block_dim, const std::string &op_type); void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } private: diff --git a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc index cc20d614..07701f4d 100644 --- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc +++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc @@ -119,7 +119,7 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { uint32_t stream_id = 1; std::string task_type = "rts"; uint32_t block_dim = 0; - node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); + node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim, op_desc->GetType()); ASSERT_TRUE(node_state->GetTaskContext() != nullptr); diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 4f14f628..688d3a34 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -100,7 +100,7 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) { op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); std::string kernel_name("kernel/Add"); AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); - ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); + ASSERT_EQ(aicore_task->Init(*op_desc.get(), task_def), SUCCESS); rtStream_t stream = nullptr; rtStreamCreate(&stream, 0); ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); @@ -676,6 +676,15 @@ TEST_F(UtestGeHybrid, test_key_for_kernel_bin) { EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname"); } +TEST_F(UtestGeHybrid, test_op_type) { + auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); + aicore_task->op_type_ = "Add"; + EXPECT_EQ(aicore_task->GetOpType(), "Add"); + + auto atomic_task = std::unique_ptr(new(std::nothrow)hybrid::AtomicAddrCleanOpTask()); + EXPECT_EQ(atomic_task->GetOpType(), "DynamicAtomicAddrClean"); +} + TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) { NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl", NodeExecutorManager::ExecutorType::HCCL);