| @@ -18,6 +18,7 @@ | |||
| #include "framework/common/taskdown_common.h" | |||
| #include "hybrid/executor/hybrid_execution_context.h" | |||
| #include "external/runtime/rt_error_codes.h" | |||
| #include "single_op/task/build_task_utils.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -196,6 +197,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
| GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | |||
| GE_CHECK_NOTNULL(context.GetExecutionContext()->model); | |||
| GELOGD("[DEBUG_TASK_INFO : Executor Task] %s/%s %s", | |||
| context.GetExecutionContext()->model->GetModelName().c_str(), | |||
| (*it)->GetName().empty() ? (*it)->GetLogName().c_str() : (*it)->GetName().c_str(), | |||
| BuildTaskUtils::GetTaskInfo(context).c_str()); | |||
| // save profiling data | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| @@ -72,6 +72,8 @@ class AiCoreOpTask { | |||
| const std::string& GetName() const; | |||
| const std::string& GetLogName() const {return log_name_;} | |||
| bool GetClearAtomic() const {return clear_atomic_;} | |||
| uint32_t GetBlockDim() const {return block_dim_;} | |||
| @@ -297,6 +297,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||
| for (auto &task : tasks_) { | |||
| ret = task->LaunchKernel(stream_); | |||
| GELOGD("[DEBUG_TASK_INFO : Static Task] %s %s", | |||
| task->GetTaskName().c_str(), | |||
| BuildTaskUtils::GetTaskInfo(task->GetOpdesc(), inputs, outputs).c_str()); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| @@ -447,6 +450,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
| } else { | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
| } | |||
| GELOGD("[DEBUG_TASK_INFO : Dynamic Task] %s", | |||
| BuildTaskUtils::GetTaskInfo(op_task_->GetOpdesc(), input_buffers, output_buffers).c_str()); | |||
| GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | |||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||
| return SUCCESS; | |||
| @@ -70,7 +70,9 @@ std::vector<void *> BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, | |||
| return JoinAddresses(addresses); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| std::string BuildTaskUtils::InnerGetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<const void *> &input_addrs, | |||
| const std::vector<const void *> &output_addrs) { | |||
| std::stringstream ss; | |||
| if (op_desc != nullptr) { | |||
| auto op_type = op_desc->GetType(); | |||
| @@ -87,7 +89,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| } | |||
| ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | |||
| ss << TypeUtils::FormatToSerialString(input->GetFormat()); | |||
| ss << VectorToString(input->GetShape().GetDims()); | |||
| ss << VectorToString(input->GetShape().GetDims()) << " "; | |||
| if (idx < input_addrs.size()) { | |||
| ss << input_addrs[idx]; | |||
| } | |||
| if (idx < op_desc->GetInputsSize() - 1) { | |||
| ss << ","; | |||
| } | |||
| @@ -101,7 +106,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| const GeShape &out_shape = output->GetShape(); | |||
| const auto &dims = out_shape.GetDims(); | |||
| ss << TypeUtils::FormatToSerialString(out_format); | |||
| ss << VectorToString(dims); | |||
| ss << VectorToString(dims) << " "; | |||
| if (idx < output_addrs.size()) { | |||
| ss << output_addrs[idx]; | |||
| } | |||
| if (idx < op_desc->GetOutputsSize() - 1) { | |||
| ss << ","; | |||
| } | |||
| @@ -110,4 +118,44 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| } | |||
| return ss.str(); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
| vector<const void *> input_addrs; | |||
| vector<const void *> output_addrs; | |||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<DataBuffer> &inputs, | |||
| const std::vector<DataBuffer> &outputs) { | |||
| vector<const void *> input_addrs; | |||
| vector<const void *> output_addrs; | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||
| if (op_desc->GetAllInputsSize() == inputs.size()) { | |||
| std::for_each(inputs.begin(), inputs.end(), [&](const DataBuffer &db) { input_addrs.push_back(db.data); }); | |||
| } | |||
| if (op_desc->GetOutputsSize() == outputs.size()) { | |||
| std::for_each(outputs.begin(), outputs.end(), [&](const DataBuffer &db) { output_addrs.push_back(db.data); }); | |||
| } | |||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
| } | |||
| std::string BuildTaskUtils::GetTaskInfo(const hybrid::TaskContext &task_context) { | |||
| auto &node_item = task_context.GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||
| vector<const void *> input_addrs; | |||
| vector<const void *> output_addrs; | |||
| if (op_desc->GetAllInputsSize() == static_cast<uint32_t>(task_context.NumInputs())) { | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| input_addrs.push_back(task_context.GetInput(i)->GetData()); | |||
| } | |||
| } | |||
| if (op_desc->GetOutputsSize() == static_cast<uint32_t>(task_context.NumOutputs())) { | |||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||
| output_addrs.push_back(task_context.GetOutput(i)->GetData()); | |||
| } | |||
| } | |||
| return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
| } | |||
| } // namespace ge | |||
| @@ -23,6 +23,7 @@ | |||
| #include "graph/op_desc.h" | |||
| #include "single_op/single_op.h" | |||
| #include "single_op/single_op_model.h" | |||
| #include "hybrid/node_executor/task_context.h" | |||
| namespace ge { | |||
| class BuildTaskUtils { | |||
| @@ -35,7 +36,14 @@ class BuildTaskUtils { | |||
| bool keep_workspace = true); | |||
| static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | |||
| static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | |||
| static std::string InnerGetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<const void *> &input_addrs, | |||
| const std::vector<const void *> &output_addrs); | |||
| static std::string GetTaskInfo(const OpDescPtr &op_desc); | |||
| static std::string GetTaskInfo(const OpDescPtr &op_desc, | |||
| const std::vector<DataBuffer> &inputs, | |||
| const std::vector<DataBuffer> &outputs); | |||
| static std::string GetTaskInfo(const hybrid::TaskContext& task_context); | |||
| template<typename T> | |||
| static std::string VectorToString(const std::vector<T> &values) { | |||
| std::stringstream ss; | |||
| @@ -89,6 +89,7 @@ Status OpTask::OpenDump(rtStream_t stream) { | |||
| void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | |||
| this->stub_name_ = name; | |||
| this->stub_func_ = stub_func; | |||
| this->task_name_ = name; | |||
| } | |||
| void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||
| @@ -44,6 +44,7 @@ class OpTask { | |||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
| void SetModelArgs(std::string model_name, uint32_t model_id); | |||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
| const std::string &GetTaskName() const {return task_name_;} | |||
| void SetOpDesc(const OpDescPtr &op_desc) { | |||
| op_desc_ = op_desc; | |||
| } | |||
| @@ -66,6 +67,7 @@ class OpTask { | |||
| std::string model_name_; | |||
| uint32_t model_id_ = 0; | |||
| uint32_t block_dim_ = 1; | |||
| std::string task_name_; | |||
| }; | |||
| class TbeOpTask : public OpTask { | |||
| @@ -34,12 +34,14 @@ | |||
| #include "hybrid/executor/hybrid_execution_context.h" | |||
| #include "hybrid/executor/hybrid_model_executor.h" | |||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||
| #include "hybrid/node_executor/aicore/aicore_node_executor.h" | |||
| #include "graph/load/model_manager/tbe_handle_store.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "hybrid/common/npu_memory_allocator.h" | |||
| #include "graph/types.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/testcase/ge_graph/graph_builder_utils.h" | |||
| #include "single_op/task/build_task_utils.h" | |||
| #include "graph/op_desc_impl.h" | |||
| #undef private | |||
| #undef protected | |||
| @@ -747,4 +749,33 @@ TEST_F(UtestGeHybrid, TestParseDependencies) { | |||
| AttrUtils::SetTensor(tensor_desc, "_value", tensor); | |||
| std::set<NodePtr> dependent_for_shape_inference; | |||
| ASSERT_EQ(builder.ParseDependencies(*node_item, deps, dependent_for_shape_inference), SUCCESS); | |||
| } | |||
| TEST_F(UtestGeHybrid, TestTaskExecuteAsync) { | |||
| auto graph = make_shared<ComputeGraph>("graph"); | |||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||
| GeShape shape({2, 16}); | |||
| GeTensorDesc tensor_desc(shape); | |||
| op_desc->AddInputDesc(tensor_desc); | |||
| op_desc->AddInputDesc(tensor_desc); | |||
| op_desc->AddOutputDesc(tensor_desc); | |||
| auto node = graph->AddNode(op_desc); | |||
| std::unique_ptr<NodeItem> node_item; | |||
| NodeItem::Create(node, node_item); | |||
| node_item->input_start = 0; | |||
| node_item->output_start = 0; | |||
| GraphExecutionContext execution_context; | |||
| GraphItem graph_item; | |||
| SubgraphContext subgraph_context(&graph_item, &execution_context); | |||
| ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||
| subgraph_context.all_inputs_.resize(2); | |||
| subgraph_context.all_outputs_.resize(1); | |||
| auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); | |||
| auto task_context = *node_state->GetTaskContext(); | |||
| ASSERT_NE(BuildTaskUtils::GetTaskInfo(task_context), ""); | |||
| std::unique_ptr<AiCoreOpTask> task1(new AiCoreOpTask()); | |||
| std::vector<std::unique_ptr<AiCoreOpTask>> tasks; | |||
| AiCoreNodeTask node_task(std::move(tasks)); | |||
| ASSERT_EQ(node_task.ExecuteAsync(task_context, nullptr), SUCCESS); | |||
| } | |||
| @@ -23,6 +23,7 @@ | |||
| #define private public | |||
| #include "single_op/single_op.h" | |||
| #include "single_op/single_op_manager.h" | |||
| #include "single_op/task/build_task_utils.h" | |||
| #undef private | |||
| #undef protected | |||
| @@ -126,9 +127,19 @@ TEST_F(UtestSingleOp, test_singleop_execute_async1) { | |||
| SingleOpModelParam model_params; | |||
| single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||
| single_op.args_.resize(1); | |||
| auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||
| ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
| EXPECT_EQ(op_desc->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||
| EXPECT_EQ(op_desc->AddOutputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||
| EXPECT_NE(BuildTaskUtils::GetTaskInfo(op_desc), ""); | |||
| ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
| ge::NodePtr node = graph->AddNode(op_desc); | |||
| tbe_task->node_ = node; | |||
| tbe_task->op_desc_ = op_desc; | |||
| single_op.tasks_.push_back(tbe_task); | |||
| EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | |||
| EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||
| EXPECT_EQ(single_op.tasks_.size(), 0); | |||
| EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | |||
| } | |||