Merge pull request !1899 from 赵之轩/my_dev4tags/v1.5.1
| @@ -214,7 +214,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| context.SetTaskId(task_id); | context.SetTaskId(task_id); | ||||
| context.SetStreamId(stream_id); | context.SetStreamId(stream_id); | ||||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim(), (*it)->GetOpType()); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| } | } | ||||
| @@ -33,6 +33,7 @@ namespace { | |||||
| constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
| constexpr char const *kAttrOpParamSize = "op_para_size"; | constexpr char const *kAttrOpParamSize = "op_para_size"; | ||||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
| const string kAtomicOpType = "DynamicAtomicAddrClean"; | |||||
| std::atomic<std::uint64_t> log_id(0); | std::atomic<std::uint64_t> log_id(0); | ||||
| } // namespace | } // namespace | ||||
| @@ -51,6 +52,7 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
| } | } | ||||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| op_type_ = op_desc.GetType(); | |||||
| log_name_ = op_desc.GetName() + "_tvmbin"; | log_name_ = op_desc.GetName() + "_tvmbin"; | ||||
| log_id_ = log_id++; | log_id_ = log_id++; | ||||
| auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | ||||
| @@ -538,6 +540,10 @@ const std::string &AiCoreOpTask::GetName() const { | |||||
| return stub_name_; | return stub_name_; | ||||
| } | } | ||||
| const std::string &AiCoreOpTask::GetOpType() const { | |||||
| return op_type_; | |||||
| } | |||||
| std::string AiCoreOpTask::GetKeyForOpParamSize() const { | std::string AiCoreOpTask::GetKeyForOpParamSize() const { | ||||
| return kAttrOpParamSize; | return kAttrOpParamSize; | ||||
| } | } | ||||
| @@ -631,6 +637,10 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co | |||||
| return op_desc.GetName() + "_atomic_kernelname"; | return op_desc.GetName() + "_atomic_kernelname"; | ||||
| } | } | ||||
| const std::string &AtomicAddrCleanOpTask::GetOpType() const { | |||||
| return kAtomicOpType; | |||||
| } | |||||
| Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | ||||
| GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | ||||
| @@ -80,6 +80,8 @@ class AiCoreOpTask { | |||||
| void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | ||||
| virtual const std::string& GetOpType() const; | |||||
| protected: | protected: | ||||
| Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
| virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
| @@ -119,12 +121,14 @@ class AiCoreOpTask { | |||||
| uint64_t log_id_ = 0; | uint64_t log_id_ = 0; | ||||
| std::string log_name_; | std::string log_name_; | ||||
| uint32_t offset_ = 0; | uint32_t offset_ = 0; | ||||
| std::string op_type_; | |||||
| }; | }; | ||||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
| public: | public: | ||||
| Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | ||||
| Status UpdateArgs(TaskContext &task_context) override; | Status UpdateArgs(TaskContext &task_context) override; | ||||
| const std::string& GetOpType() const override; | |||||
| protected: | protected: | ||||
| std::string GetKeyForOpParamSize() const override; | std::string GetKeyForOpParamSize() const override; | ||||
| @@ -207,7 +207,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| context.SetTaskId(task_id); | context.SetTaskId(task_id); | ||||
| context.SetStreamId(stream_id); | context.SetStreamId(stream_id); | ||||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0, node_type_); | |||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
| @@ -571,8 +571,8 @@ Status TaskContext::Synchronize() { | |||||
| return execution_context_->Synchronize(GetStream()); | return execution_context_->Synchronize(GetStream()); | ||||
| } | } | ||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
| const std::string &task_type, uint32_t block_dim) { | |||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||||
| uint32_t block_dim, const std::string &op_type) { | |||||
| if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | ||||
| const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
| auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
| @@ -586,7 +586,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
| TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
| tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
| tmp_task_desc_info.op_name = op_desc->GetName(); | tmp_task_desc_info.op_name = op_desc->GetName(); | ||||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||||
| tmp_task_desc_info.op_type = op_type; | |||||
| tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
| tmp_task_desc_info.task_type = task_type; | tmp_task_desc_info.task_type = task_type; | ||||
| tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
| @@ -118,8 +118,8 @@ class TaskContext { | |||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
| const std::string &task_type, uint32_t block_dim); | |||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||||
| uint32_t block_dim, const std::string &op_type); | |||||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
| private: | private: | ||||
| @@ -119,7 +119,7 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { | |||||
| uint32_t stream_id = 1; | uint32_t stream_id = 1; | ||||
| std::string task_type = "rts"; | std::string task_type = "rts"; | ||||
| uint32_t block_dim = 0; | uint32_t block_dim = 0; | ||||
| node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); | |||||
| node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim, op_desc->GetType()); | |||||
| ASSERT_TRUE(node_state->GetTaskContext() != nullptr); | ASSERT_TRUE(node_state->GetTaskContext() != nullptr); | ||||
| @@ -102,7 +102,7 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) { | |||||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | ||||
| std::string kernel_name("kernel/Add"); | std::string kernel_name("kernel/Add"); | ||||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | ||||
| ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); | |||||
| ASSERT_EQ(aicore_task->Init(*op_desc.get(), task_def), SUCCESS); | |||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| rtStreamCreate(&stream, 0); | rtStreamCreate(&stream, 0); | ||||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | ||||
| @@ -678,6 +678,15 @@ TEST_F(UtestGeHybrid, test_key_for_kernel_bin) { | |||||
| EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname"); | EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname"); | ||||
| } | } | ||||
| TEST_F(UtestGeHybrid, test_op_type) { | |||||
| auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
| aicore_task->op_type_ = "Add"; | |||||
| EXPECT_EQ(aicore_task->GetOpType(), "Add"); | |||||
| auto atomic_task = std::unique_ptr<hybrid::AtomicAddrCleanOpTask>(new(std::nothrow)hybrid::AtomicAddrCleanOpTask()); | |||||
| EXPECT_EQ(atomic_task->GetOpType(), "DynamicAtomicAddrClean"); | |||||
| } | |||||
| TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) { | TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) { | ||||
| NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl", | NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl", | ||||
| NodeExecutorManager::ExecutorType::HCCL); | NodeExecutorManager::ExecutorType::HCCL); | ||||