@@ -18,6 +18,7 @@ | |||||
#include "framework/common/taskdown_common.h" | #include "framework/common/taskdown_common.h" | ||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "external/runtime/rt_error_codes.h" | #include "external/runtime/rt_error_codes.h" | ||||
#include "single_op/task/build_task_utils.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -196,6 +197,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | ||||
GE_CHECK_NOTNULL(context.GetExecutionContext()->model); | |||||
GELOGD("[DEBUG_TASK_INFO : Executor Task] %s/%s %s", | |||||
context.GetExecutionContext()->model->GetModelName().c_str(), | |||||
(*it)->GetName().empty() ? (*it)->GetLogName().c_str() : (*it)->GetName().c_str(), | |||||
BuildTaskUtils::GetTaskInfo(context).c_str()); | |||||
// save profiling data | // save profiling data | ||||
uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
@@ -72,6 +72,8 @@ class AiCoreOpTask { | |||||
const std::string& GetName() const; | const std::string& GetName() const; | ||||
const std::string& GetLogName() const {return log_name_;} | |||||
bool GetClearAtomic() const {return clear_atomic_;} | bool GetClearAtomic() const {return clear_atomic_;} | ||||
uint32_t GetBlockDim() const {return block_dim_;} | uint32_t GetBlockDim() const {return block_dim_;} | ||||
@@ -297,6 +297,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
for (auto &task : tasks_) { | for (auto &task : tasks_) { | ||||
ret = task->LaunchKernel(stream_); | ret = task->LaunchKernel(stream_); | ||||
GELOGD("[DEBUG_TASK_INFO : Static Task] %s %s", | |||||
task->GetTaskName().c_str(), | |||||
BuildTaskUtils::GetTaskInfo(task->GetOpdesc(), inputs, outputs).c_str()); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -447,6 +450,8 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
} else { | } else { | ||||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | ||||
} | } | ||||
GELOGD("[DEBUG_TASK_INFO : Dynamic Task] %s", | |||||
BuildTaskUtils::GetTaskInfo(op_task_->GetOpdesc(), input_buffers, output_buffers).c_str()); | |||||
GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | ||||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -70,7 +70,9 @@ std::vector<void *> BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, | |||||
return JoinAddresses(addresses); | return JoinAddresses(addresses); | ||||
} | } | ||||
std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
std::string BuildTaskUtils::InnerGetTaskInfo(const OpDescPtr &op_desc, | |||||
const std::vector<const void *> &input_addrs, | |||||
const std::vector<const void *> &output_addrs) { | |||||
std::stringstream ss; | std::stringstream ss; | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
auto op_type = op_desc->GetType(); | auto op_type = op_desc->GetType(); | ||||
@@ -87,7 +89,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
} | } | ||||
ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | ||||
ss << TypeUtils::FormatToSerialString(input->GetFormat()); | ss << TypeUtils::FormatToSerialString(input->GetFormat()); | ||||
ss << VectorToString(input->GetShape().GetDims()); | |||||
ss << VectorToString(input->GetShape().GetDims()) << " "; | |||||
if (idx < input_addrs.size()) { | |||||
ss << input_addrs[idx]; | |||||
} | |||||
if (idx < op_desc->GetInputsSize() - 1) { | if (idx < op_desc->GetInputsSize() - 1) { | ||||
ss << ","; | ss << ","; | ||||
} | } | ||||
@@ -101,7 +106,10 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
const GeShape &out_shape = output->GetShape(); | const GeShape &out_shape = output->GetShape(); | ||||
const auto &dims = out_shape.GetDims(); | const auto &dims = out_shape.GetDims(); | ||||
ss << TypeUtils::FormatToSerialString(out_format); | ss << TypeUtils::FormatToSerialString(out_format); | ||||
ss << VectorToString(dims); | |||||
ss << VectorToString(dims) << " "; | |||||
if (idx < output_addrs.size()) { | |||||
ss << output_addrs[idx]; | |||||
} | |||||
if (idx < op_desc->GetOutputsSize() - 1) { | if (idx < op_desc->GetOutputsSize() - 1) { | ||||
ss << ","; | ss << ","; | ||||
} | } | ||||
@@ -110,4 +118,44 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
} | } | ||||
return ss.str(); | return ss.str(); | ||||
} | } | ||||
std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
vector<const void *> input_addrs; | |||||
vector<const void *> output_addrs; | |||||
return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||||
} | |||||
std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc, | |||||
const std::vector<DataBuffer> &inputs, | |||||
const std::vector<DataBuffer> &outputs) { | |||||
vector<const void *> input_addrs; | |||||
vector<const void *> output_addrs; | |||||
GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||||
if (op_desc->GetAllInputsSize() == inputs.size()) { | |||||
std::for_each(inputs.begin(), inputs.end(), [&](const DataBuffer &db) { input_addrs.push_back(db.data); }); | |||||
} | |||||
if (op_desc->GetOutputsSize() == outputs.size()) { | |||||
std::for_each(outputs.begin(), outputs.end(), [&](const DataBuffer &db) { output_addrs.push_back(db.data); }); | |||||
} | |||||
return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||||
} | |||||
std::string BuildTaskUtils::GetTaskInfo(const hybrid::TaskContext &task_context) { | |||||
auto &node_item = task_context.GetNodeItem(); | |||||
auto op_desc = node_item.GetOpDesc(); | |||||
GE_CHECK_NOTNULL_EXEC(op_desc, return ""); | |||||
vector<const void *> input_addrs; | |||||
vector<const void *> output_addrs; | |||||
if (op_desc->GetAllInputsSize() == static_cast<uint32_t>(task_context.NumInputs())) { | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
input_addrs.push_back(task_context.GetInput(i)->GetData()); | |||||
} | |||||
} | |||||
if (op_desc->GetOutputsSize() == static_cast<uint32_t>(task_context.NumOutputs())) { | |||||
for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||||
output_addrs.push_back(task_context.GetOutput(i)->GetData()); | |||||
} | |||||
} | |||||
return InnerGetTaskInfo(op_desc, input_addrs, output_addrs); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -23,6 +23,7 @@ | |||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
#include "single_op/single_op.h" | #include "single_op/single_op.h" | ||||
#include "single_op/single_op_model.h" | #include "single_op/single_op_model.h" | ||||
#include "hybrid/node_executor/task_context.h" | |||||
namespace ge { | namespace ge { | ||||
class BuildTaskUtils { | class BuildTaskUtils { | ||||
@@ -35,7 +36,14 @@ class BuildTaskUtils { | |||||
bool keep_workspace = true); | bool keep_workspace = true); | ||||
static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | static std::vector<void *> JoinAddresses(const std::vector<std::vector<void *>> &addresses); | ||||
static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | static std::vector<void *> GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); | ||||
static std::string InnerGetTaskInfo(const OpDescPtr &op_desc, | |||||
const std::vector<const void *> &input_addrs, | |||||
const std::vector<const void *> &output_addrs); | |||||
static std::string GetTaskInfo(const OpDescPtr &op_desc); | static std::string GetTaskInfo(const OpDescPtr &op_desc); | ||||
static std::string GetTaskInfo(const OpDescPtr &op_desc, | |||||
const std::vector<DataBuffer> &inputs, | |||||
const std::vector<DataBuffer> &outputs); | |||||
static std::string GetTaskInfo(const hybrid::TaskContext& task_context); | |||||
template<typename T> | template<typename T> | ||||
static std::string VectorToString(const std::vector<T> &values) { | static std::string VectorToString(const std::vector<T> &values) { | ||||
std::stringstream ss; | std::stringstream ss; | ||||
@@ -89,6 +89,7 @@ Status OpTask::OpenDump(rtStream_t stream) { | |||||
void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { | ||||
this->stub_name_ = name; | this->stub_name_ = name; | ||||
this->stub_func_ = stub_func; | this->stub_func_ = stub_func; | ||||
this->task_name_ = name; | |||||
} | } | ||||
void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | ||||
@@ -44,6 +44,7 @@ class OpTask { | |||||
virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | ||||
const std::string &GetTaskName() const {return task_name_;} | |||||
void SetOpDesc(const OpDescPtr &op_desc) { | void SetOpDesc(const OpDescPtr &op_desc) { | ||||
op_desc_ = op_desc; | op_desc_ = op_desc; | ||||
} | } | ||||
@@ -66,6 +67,7 @@ class OpTask { | |||||
std::string model_name_; | std::string model_name_; | ||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
std::string task_name_; | |||||
}; | }; | ||||
class TbeOpTask : public OpTask { | class TbeOpTask : public OpTask { | ||||
@@ -34,12 +34,14 @@ | |||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "hybrid/executor/hybrid_model_executor.h" | #include "hybrid/executor/hybrid_model_executor.h" | ||||
#include "hybrid/node_executor/aicore/aicore_task_builder.h" | #include "hybrid/node_executor/aicore/aicore_task_builder.h" | ||||
#include "hybrid/node_executor/aicore/aicore_node_executor.h" | |||||
#include "graph/load/model_manager/tbe_handle_store.h" | #include "graph/load/model_manager/tbe_handle_store.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
#include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
#include "graph/types.h" | #include "graph/types.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/testcase/ge_graph/graph_builder_utils.h" | #include "graph/testcase/ge_graph/graph_builder_utils.h" | ||||
#include "single_op/task/build_task_utils.h" | |||||
#include "graph/op_desc_impl.h" | #include "graph/op_desc_impl.h" | ||||
#undef private | #undef private | ||||
#undef protected | #undef protected | ||||
@@ -747,4 +749,33 @@ TEST_F(UtestGeHybrid, TestParseDependencies) { | |||||
AttrUtils::SetTensor(tensor_desc, "_value", tensor); | AttrUtils::SetTensor(tensor_desc, "_value", tensor); | ||||
std::set<NodePtr> dependent_for_shape_inference; | std::set<NodePtr> dependent_for_shape_inference; | ||||
ASSERT_EQ(builder.ParseDependencies(*node_item, deps, dependent_for_shape_inference), SUCCESS); | ASSERT_EQ(builder.ParseDependencies(*node_item, deps, dependent_for_shape_inference), SUCCESS); | ||||
} | |||||
TEST_F(UtestGeHybrid, TestTaskExecuteAsync) { | |||||
auto graph = make_shared<ComputeGraph>("graph"); | |||||
OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||||
GeShape shape({2, 16}); | |||||
GeTensorDesc tensor_desc(shape); | |||||
op_desc->AddInputDesc(tensor_desc); | |||||
op_desc->AddInputDesc(tensor_desc); | |||||
op_desc->AddOutputDesc(tensor_desc); | |||||
auto node = graph->AddNode(op_desc); | |||||
std::unique_ptr<NodeItem> node_item; | |||||
NodeItem::Create(node, node_item); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
GraphExecutionContext execution_context; | |||||
GraphItem graph_item; | |||||
SubgraphContext subgraph_context(&graph_item, &execution_context); | |||||
ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||||
subgraph_context.all_inputs_.resize(2); | |||||
subgraph_context.all_outputs_.resize(1); | |||||
auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); | |||||
auto task_context = *node_state->GetTaskContext(); | |||||
ASSERT_NE(BuildTaskUtils::GetTaskInfo(task_context), ""); | |||||
std::unique_ptr<AiCoreOpTask> task1(new AiCoreOpTask()); | |||||
std::vector<std::unique_ptr<AiCoreOpTask>> tasks; | |||||
AiCoreNodeTask node_task(std::move(tasks)); | |||||
ASSERT_EQ(node_task.ExecuteAsync(task_context, nullptr), SUCCESS); | |||||
} | } |
@@ -23,6 +23,7 @@ | |||||
#define private public | #define private public | ||||
#include "single_op/single_op.h" | #include "single_op/single_op.h" | ||||
#include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
#include "single_op/task/build_task_utils.h" | |||||
#undef private | #undef private | ||||
#undef protected | #undef protected | ||||
@@ -126,9 +127,19 @@ TEST_F(UtestSingleOp, test_singleop_execute_async1) { | |||||
SingleOpModelParam model_params; | SingleOpModelParam model_params; | ||||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | ||||
single_op.args_.resize(1); | single_op.args_.resize(1); | ||||
auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||||
ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
EXPECT_EQ(op_desc->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||||
EXPECT_EQ(op_desc->AddOutputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||||
EXPECT_NE(BuildTaskUtils::GetTaskInfo(op_desc), ""); | |||||
ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
ge::NodePtr node = graph->AddNode(op_desc); | |||||
tbe_task->node_ = node; | |||||
tbe_task->op_desc_ = op_desc; | |||||
single_op.tasks_.push_back(tbe_task); | |||||
EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | ||||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | ||||
EXPECT_EQ(single_op.tasks_.size(), 0); | |||||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | ||||
} | } | ||||