@@ -18,6 +18,7 @@ | |||
#include "framework/common/taskdown_common.h" | |||
#include "hybrid/executor/hybrid_execution_context.h" | |||
#include "external/runtime/rt_error_codes.h" | |||
#include "single_op/task/build_task_utils.h" | |||
namespace ge { | |||
namespace hybrid { | |||
@@ -196,6 +197,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | |||
GE_CHECK_NOTNULL(context.GetExecutionContext()->model); | |||
GELOGD("[DEBUG_TASK_INFO : Executor Task] %s/%s %s", | |||
context.GetExecutionContext()->model->GetModelName().c_str(), | |||
(*it)->GetName().empty() ? (*it)->GetLogName().c_str() : (*it)->GetName().c_str(), | |||
BuildTaskUtils::GetTaskInfo(context).c_str()); | |||
// save profiling data | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
@@ -72,6 +72,8 @@ class AiCoreOpTask { | |||
const std::string& GetName() const; | |||
const std::string& GetLogName() const {return log_name_;} | |||
bool GetClearAtomic() const {return clear_atomic_;} | |||
uint32_t GetBlockDim() const {return block_dim_;} | |||
@@ -297,6 +297,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||
for (auto &task : tasks_) { | |||
ret = task->LaunchKernel(stream_); | |||
GELOGD("[DEBUG_TASK_INFO : Static Task] %s %s", | |||
task->GetTaskName().c_str(), | |||
BuildTaskUtils::GetTaskInfo(task->GetOpdesc(), inputs, outputs).c_str()); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
@@ -447,6 +450,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
} else { | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
} | |||
GELOGD("[DEBUG_TASK_INFO : Dynamic Task] %s", | |||
BuildTaskUtils::GetTaskInfo(op_task_->GetOpdesc(), input_buffers, output_buffers).c_str()); | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | |||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||
return SUCCESS; | |||
@@ -70,7 +70,9 @@ std::vector<void *> BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, | |||
return JoinAddresses(addresses); | |||
} | |||
std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
std::string BuildTaskUtils::InnerGetTaskInfo(const OpDescPtr &op_desc, | |||
const std::vector<const void *> &input_addrs, | |||
const std::vector<const void *> &output_addrs) { | |||
std::stringstream ss; | |||
if (op_desc != nullptr) { | |||
auto op_type = op_desc->GetType(); | |||
@@ -87,7 +89,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
} | |||
ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | |||
ss << TypeUtils::FormatToSerialString(input->GetFormat()); | |||
ss << VectorToString(input->GetShape().GetDims()); | |||
ss << VectorToString(input->GetShape().GetDims()) << " "; | |||
if (idx < input_addrs.size()) { | |||
ss << input_addrs[idx]; | |||
} | |||
if (idx < op_desc->GetInputsSize() - 1) { | |||
ss << ","; | |||
} | |||
@@ -101,7 +106,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
const GeShape &out_shape = output->GetShape(); | |||
const auto &dims = out_shape.GetDims(); | |||
ss << TypeUtils::FormatToSerialString(out_format); | |||
ss << VectorToString(dims); | |||
ss << VectorToString(dims) << " "; | |||
if (idx < output_addrs.size()) { | |||
ss << output_addrs[idx]; | |||
} | |||
if (idx < op_desc->GetOutputsSize() - 1) { | |||
ss << ","; | |||
} | |||
@@ -110,4 +118,44 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
} | |||
return ss.str(); | |||
} | |||
std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
vector<const void *> input_addrs; | |||
vector<const void *> output_addrs; | |||
return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
} | |||
std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc, | |||
const std::vector<DataBuffer> &inputs, | |||
const std::vector<DataBuffer> &outputs) { | |||
vector<const void *> input_addrs; | |||
vector<const void *> output_addrs; | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||
if (op_desc->GetAllInputsSize() == inputs.size()) { | |||
std::for_each(inputs.begin(), inputs.end(), [&](const DataBuffer &db) { input_addrs.push_back(db.data); }); | |||
} | |||
if (op_desc->GetOutputsSize() == outputs.size()) { | |||
std::for_each(outputs.begin(), outputs.end(), [&](const DataBuffer &db) { output_addrs.push_back(db.data); }); | |||
} | |||
return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
} | |||
std::string BuildTaskUtils::GetTaskInfo(const hybrid::TaskContext &task_context) { | |||
auto &node_item = task_context.GetNodeItem(); | |||
auto op_desc = node_item.GetOpDesc(); | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||
vector<const void *> input_addrs; | |||
vector<const void *> output_addrs; | |||
if (op_desc->GetAllInputsSize() == static_cast<uint32_t>(task_context.NumInputs())) { | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
input_addrs.push_back(task_context.GetInput(i)->GetData()); | |||
} | |||
} | |||
if (op_desc->GetOutputsSize() == static_cast<uint32_t>(task_context.NumOutputs())) { | |||
for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||
output_addrs.push_back(task_context.GetOutput(i)->GetData()); | |||
} | |||
} | |||
return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||
} | |||
} // namespace ge |
@@ -23,6 +23,7 @@ | |||
#include "graph/op_desc.h" | |||
#include "single_op/single_op.h" | |||
#include "single_op/single_op_model.h" | |||
#include "hybrid/node_executor/task_context.h" | |||
namespace ge { | |||
class BuildTaskUtils { | |||
@@ -35,7 +36,14 @@ class BuildTaskUtils { | |||
bool keep_workspace = true); | |||
static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | |||
static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | |||
static std::string InnerGetTaskInfo(const OpDescPtr &op_desc, | |||
const std::vector<const void *> &input_addrs, | |||
const std::vector<const void *> &output_addrs); | |||
static std::string GetTaskInfo(const OpDescPtr &op_desc); | |||
static std::string GetTaskInfo(const OpDescPtr &op_desc, | |||
const std::vector<DataBuffer> &inputs, | |||
const std::vector<DataBuffer> &outputs); | |||
static std::string GetTaskInfo(const hybrid::TaskContext& task_context); | |||
template<typename T> | |||
static std::string VectorToString(const std::vector<T> &values) { | |||
std::stringstream ss; | |||
@@ -89,6 +89,7 @@ Status OpTask::OpenDump(rtStream_t stream) { | |||
void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | |||
this->stub_name_ = name; | |||
this->stub_func_ = stub_func; | |||
this->task_name_ = name; | |||
} | |||
void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||
@@ -44,6 +44,7 @@ class OpTask { | |||
virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
void SetModelArgs(std::string model_name, uint32_t model_id); | |||
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
const std::string &GetTaskName() const {return task_name_;} | |||
void SetOpDesc(const OpDescPtr &op_desc) { | |||
op_desc_ = op_desc; | |||
} | |||
@@ -66,6 +67,7 @@ class OpTask { | |||
std::string model_name_; | |||
uint32_t model_id_ = 0; | |||
uint32_t block_dim_ = 1; | |||
std::string task_name_; | |||
}; | |||
class TbeOpTask : public OpTask { | |||
@@ -34,12 +34,14 @@ | |||
#include "hybrid/executor/hybrid_execution_context.h" | |||
#include "hybrid/executor/hybrid_model_executor.h" | |||
#include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||
#include "hybrid/node_executor/aicore/aicore_node_executor.h" | |||
#include "graph/load/model_manager/tbe_handle_store.h" | |||
#include "graph/manager/graph_mem_allocator.h" | |||
#include "hybrid/common/npu_memory_allocator.h" | |||
#include "graph/types.h" | |||
#include "graph/utils/tensor_utils.h" | |||
#include "graph/testcase/ge_graph/graph_builder_utils.h" | |||
#include "single_op/task/build_task_utils.h" | |||
#include "graph/op_desc_impl.h" | |||
#undef private | |||
#undef protected | |||
@@ -747,4 +749,33 @@ TEST_F(UtestGeHybrid, TestParseDependencies) { | |||
AttrUtils::SetTensor(tensor_desc, "_value", tensor); | |||
std::set<NodePtr> dependent_for_shape_inference; | |||
ASSERT_EQ(builder.ParseDependencies(*node_item, deps, dependent_for_shape_inference), SUCCESS); | |||
} | |||
TEST_F(UtestGeHybrid, TestTaskExecuteAsync) { | |||
auto graph = make_shared<ComputeGraph>("graph"); | |||
OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||
GeShape shape({2, 16}); | |||
GeTensorDesc tensor_desc(shape); | |||
op_desc->AddInputDesc(tensor_desc); | |||
op_desc->AddInputDesc(tensor_desc); | |||
op_desc->AddOutputDesc(tensor_desc); | |||
auto node = graph->AddNode(op_desc); | |||
std::unique_ptr<NodeItem> node_item; | |||
NodeItem::Create(node, node_item); | |||
node_item->input_start = 0; | |||
node_item->output_start = 0; | |||
GraphExecutionContext execution_context; | |||
GraphItem graph_item; | |||
SubgraphContext subgraph_context(&graph_item, &execution_context); | |||
ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||
subgraph_context.all_inputs_.resize(2); | |||
subgraph_context.all_outputs_.resize(1); | |||
auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); | |||
auto task_context = *node_state->GetTaskContext(); | |||
ASSERT_NE(BuildTaskUtils::GetTaskInfo(task_context), ""); | |||
std::unique_ptr<AiCoreOpTask> task1(new AiCoreOpTask()); | |||
std::vector<std::unique_ptr<AiCoreOpTask>> tasks; | |||
AiCoreNodeTask node_task(std::move(tasks)); | |||
ASSERT_EQ(node_task.ExecuteAsync(task_context, nullptr), SUCCESS); | |||
} |
@@ -23,6 +23,7 @@ | |||
#define private public | |||
#include "single_op/single_op.h" | |||
#include "single_op/single_op_manager.h" | |||
#include "single_op/task/build_task_utils.h" | |||
#undef private | |||
#undef protected | |||
@@ -126,9 +127,19 @@ TEST_F(UtestSingleOp, test_singleop_execute_async1) { | |||
SingleOpModelParam model_params; | |||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||
single_op.args_.resize(1); | |||
auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||
ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
EXPECT_EQ(op_desc->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||
EXPECT_EQ(op_desc->AddOutputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||
EXPECT_NE(BuildTaskUtils::GetTaskInfo(op_desc), ""); | |||
ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
ge::NodePtr node = graph->AddNode(op_desc); | |||
tbe_task->node_ = node; | |||
tbe_task->op_desc_ = op_desc; | |||
single_op.tasks_.push_back(tbe_task); | |||
EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | |||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||
EXPECT_EQ(single_op.tasks_.size(), 0); | |||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | |||
} | |||