@@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
// 1. Copy context from kernelExDef.private to workspace | // 1. Copy context from kernelExDef.private to workspace | ||||
uint32_t op_index = kernel_ex_def.op_index(); | uint32_t op_index = kernel_ex_def.op_index(); | ||||
OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | ||||
op_desc_ = op_desc; | |||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); | REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); | ||||
GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); | GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); | ||||
@@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() { | |||||
if (topic_type_flag_ > 0) { | if (topic_type_flag_ > 0) { | ||||
dump_flag_ = dump_flag_ | topic_type_flag_; | dump_flag_ = dump_flag_ | topic_type_flag_; | ||||
} | } | ||||
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||||
rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); | REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); | ||||
GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); | GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); | ||||
@@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
uint32_t dump_flag_; | uint32_t dump_flag_; | ||||
uint32_t kernel_buf_size_; | uint32_t kernel_buf_size_; | ||||
DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
OpDescPtr op_desc_; | |||||
void *kernel_buf_; | void *kernel_buf_; | ||||
void *input_output_addr_; | void *input_output_addr_; | ||||
void *ext_info_addr_; | void *ext_info_addr_; | ||||
@@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() { | |||||
} | } | ||||
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | ||||
// blockDim is reserved parameter, set to 1 | // blockDim is reserved parameter, set to 1 | ||||
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), | |||||
reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_, | |||||
nullptr, stream_, dump_flag_); | |||||
std::string op_name = op_desc_->GetName(); | |||||
rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()}; | |||||
rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_, | |||||
nullptr, stream_, dump_flag_); | |||||
call_save_dump_ = true; | call_save_dump_ = true; | ||||
} else { | } else { | ||||
/* default: not skt launch */ | /* default: not skt launch */ | ||||
@@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, | |||||
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); | GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); | ||||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); | ||||
GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||||
GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||||
RT_KERNEL_DEFAULT, context.GetStream())); | RT_KERNEL_DEFAULT, context.GetStream())); | ||||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); | ||||
@@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||||
GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); | GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); | ||||
uint32_t flag = RT_KERNEL_DEFAULT; | uint32_t flag = RT_KERNEL_DEFAULT; | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); | ||||
GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); | |||||
GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(), | |||||
kernel_buf_->GetSize(), flag, context.GetStream())); | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | ||||
GELOGD("Node[%s] launch end.", node_name_.c_str()); | GELOGD("Node[%s] launch end.", node_name_.c_str()); | ||||
if (need_sync_) { | if (need_sync_) { | ||||
@@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||||
if (kernel_type == ccKernelType::CUST_AI_CPU) { | if (kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | ||||
} | } | ||||
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), | |||||
reinterpret_cast<const void *>(kernel_name.c_str()), | |||||
1, // default core dim is 1 | |||||
args_.get(), args_size_, | |||||
nullptr, context.GetStream(), flag); | |||||
rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()}; | |||||
auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, | |||||
1, // default core dim is 1 | |||||
args_.get(), args_size_, | |||||
nullptr, context.GetStream(), flag); | |||||
GE_CHK_RT_RET(rt_ret); | GE_CHK_RT_RET(rt_ret); | ||||
GELOGD("Node[%s] launch task end.", node_name_.c_str()); | GELOGD("Node[%s] launch task end.", node_name_.c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -0,0 +1,10 @@ | |||||
approvers: | |||||
- gegenhua | |||||
reviewers: | |||||
- wqtshg | |||||
- ji_chen | |||||
- xchu42 | |||||
- sheng-nan | |||||
- wangxiaotian22 | |||||
- zhangxiaokun9 | |||||
- tangqunzhang |
@@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { | |||||
rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | ||||
return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
} | } | ||||
rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { | |||||
return RT_ERROR_NONE; | |||||
} | |||||
rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||||
uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) { | |||||
return RT_ERROR_NONE; | |||||
} | |||||
rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||||
uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) { | |||||
return RT_ERROR_NONE; | |||||
} | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif |
@@ -735,6 +735,7 @@ set(HYBRID_TEST_FILES | |||||
"hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | ||||
"hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | ||||
"hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" | "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" | ||||
"hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc" | |||||
"hybrid/executor/hybrid_model_async_executor_unittest.cc" | "hybrid/executor/hybrid_model_async_executor_unittest.cc" | ||||
"hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | ||||
"hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | ||||
@@ -0,0 +1,168 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gmock/gmock.h> | |||||
#include <gtest/gtest.h> | |||||
#include <vector> | |||||
#define private public | |||||
#define protected public | |||||
#include "graph/runtime_inference_context.h" | |||||
#include "aicpu/common/aicpu_task_struct.h" | |||||
#include "hybrid/executor/subgraph_context.h" | |||||
#include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | |||||
#undef protected | |||||
#undef private | |||||
using namespace std; | |||||
using namespace testing; | |||||
namespace { | |||||
struct AicpuTaskStruct { | |||||
aicpu::AicpuParamHead head; | |||||
uint64_t io_addrp[6]; | |||||
}__attribute__((packed)); | |||||
} // namespace | |||||
namespace ge { | |||||
using namespace hybrid; | |||||
class UtestAicpuNodeExecutor : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
}; | |||||
static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | |||||
OpDescPtr op_desc = std::make_shared<OpDesc>(name, type); | |||||
op_desc->SetStreamId(0); | |||||
static int32_t index = 0; | |||||
op_desc->SetId(index++); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); | |||||
TensorUtils::SetSize(tensor, 64); | |||||
vector<int64_t> input_offset; | |||||
for (int i = 0; i < in_num; i++) { | |||||
op_desc->AddInputDesc(tensor); | |||||
input_offset.emplace_back(i * 64); | |||||
} | |||||
op_desc->SetInputOffset(input_offset); | |||||
vector<int64_t> output_offset; | |||||
for (int i = 0; i < out_num; i++) { | |||||
op_desc->AddOutputDesc(tensor); | |||||
output_offset.emplace_back(in_num * 64 + i * 64); | |||||
} | |||||
op_desc->SetOutputOffset(output_offset); | |||||
return graph->AddNode(op_desc); | |||||
} | |||||
TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { | |||||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
GeModelPtr ge_sub_model = std::make_shared<GeModel>(); | |||||
GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||||
ge_root_model->SetModelName("test_name"); | |||||
ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model); | |||||
HybridModel hybrid_model(ge_root_model); | |||||
NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2); | |||||
std::unique_ptr<NodeItem> new_node; | |||||
ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||||
NodeItem *node_item = new_node.get(); | |||||
hybrid_model.node_items_[node] = std::move(new_node); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
node_item->is_dynamic = true; | |||||
node_item->shape_inference_type = DEPEND_COMPUTE; | |||||
GraphItem graph_item; | |||||
graph_item.node_items_.emplace_back(node_item); | |||||
graph_item.total_inputs_ = 4; | |||||
graph_item.total_outputs_ = 2; | |||||
GraphExecutionContext graph_context; | |||||
SubgraphContext subgraph_context(&graph_item, &graph_context); | |||||
ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||||
graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||||
auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||||
ASSERT_NE(node_state, nullptr); | |||||
for (int i=0; i<4; ++i) { | |||||
uint64_t value_0 = 512; | |||||
TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||||
subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||||
} | |||||
uint64_t value_0 = 512; | |||||
TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||||
subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||||
uint64_t value_1 = 512; | |||||
TensorValue out_tensor1(&value_1, sizeof(value_1)); | |||||
subgraph_context.SetOutput(*node_item, 1, out_tensor1); | |||||
// task | |||||
domi::TaskDef task_def; | |||||
domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex(); | |||||
kernel_ex_def->set_kernel_ext_info_size(12); | |||||
AicpuExtInfo aicpu_ext_info; | |||||
aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||||
aicpu_ext_info.infoLen = sizeof(int32_t); | |||||
int32_t type = node_item->shape_inference_type; | |||||
memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||||
char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||||
memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||||
std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||||
std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info(); | |||||
(*mutable_ext_info) = ext_info; | |||||
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||||
AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||||
ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
AicpuTaskStruct args; | |||||
args.head.length = sizeof(args); | |||||
args.head.ioAddrNum = 6; | |||||
domi::TaskDef task_def2; | |||||
task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
task_def2.mutable_kernel()->set_args_size(args.head.length); | |||||
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2}); | |||||
AicpuNodeTask aicpu_node_task(node_item, task_def); | |||||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
//kernel_ex_def->set_allocated_kernel_ext_info(nullptr); | |||||
free(ext_mem); | |||||
} | |||||
} // namespace ge | |||||