@@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
// 1. Copy context from kernelExDef.private to workspace | |||
uint32_t op_index = kernel_ex_def.op_index(); | |||
OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | |||
op_desc_ = op_desc; | |||
if (op_desc == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); | |||
GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); | |||
@@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() { | |||
if (topic_type_flag_ > 0) { | |||
dump_flag_ = dump_flag_ | static_cast<uint32_t>(topic_type_flag_); | |||
} | |||
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||
rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); | |||
@@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo { | |||
uint32_t dump_flag_; | |||
uint32_t kernel_buf_size_; | |||
DavinciModel *davinci_model_; | |||
OpDescPtr op_desc_; | |||
void *kernel_buf_; | |||
void *input_output_addr_; | |||
void *ext_info_addr_; | |||
@@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() { | |||
} | |||
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | |||
// blockDim is reserved parameter, set to 1 | |||
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), | |||
reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_, | |||
nullptr, stream_, dump_flag_); | |||
std::string op_name = op_desc_->GetName(); | |||
rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()}; | |||
rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_, | |||
nullptr, stream_, dump_flag_); | |||
call_save_dump_ = true; | |||
} else { | |||
/* default: not skt launch */ | |||
@@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, | |||
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); | |||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); | |||
GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||
GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||
RT_KERNEL_DEFAULT, context.GetStream())); | |||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); | |||
@@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||
GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); | |||
uint32_t flag = RT_KERNEL_DEFAULT; | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); | |||
GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); | |||
GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(), | |||
kernel_buf_->GetSize(), flag, context.GetStream())); | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | |||
GELOGD("Node[%s] launch end.", node_name_.c_str()); | |||
if (need_sync_) { | |||
@@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||
if (kernel_type == ccKernelType::CUST_AI_CPU) { | |||
flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | |||
} | |||
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), | |||
reinterpret_cast<const void *>(kernel_name.c_str()), | |||
1, // default core dim is 1 | |||
args_.get(), args_size_, | |||
nullptr, context.GetStream(), flag); | |||
rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()}; | |||
auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, | |||
1, // default core dim is 1 | |||
args_.get(), args_size_, | |||
nullptr, context.GetStream(), flag); | |||
GE_CHK_RT_RET(rt_ret); | |||
GELOGD("Node[%s] launch task end.", node_name_.c_str()); | |||
return SUCCESS; | |||
@@ -0,0 +1,10 @@ | |||
approvers: | |||
- gegenhua | |||
reviewers: | |||
- wqtshg | |||
- ji_chen | |||
- xchu42 | |||
- sheng-nan | |||
- wangxiaotian22 | |||
- zhangxiaokun9 | |||
- tangqunzhang |
@@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { | |||
rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||
uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||
uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) { | |||
return RT_ERROR_NONE; | |||
} | |||
#ifdef __cplusplus | |||
} | |||
#endif |
@@ -736,6 +736,7 @@ set(HYBRID_TEST_FILES | |||
"hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | |||
"hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | |||
"hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" | |||
"hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc" | |||
"hybrid/executor/hybrid_model_async_executor_unittest.cc" | |||
"hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | |||
"hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | |||
@@ -0,0 +1,168 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gmock/gmock.h> | |||
#include <gtest/gtest.h> | |||
#include <vector> | |||
#define private public | |||
#define protected public | |||
#include "graph/runtime_inference_context.h" | |||
#include "aicpu/common/aicpu_task_struct.h" | |||
#include "hybrid/executor/subgraph_context.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | |||
#undef protected | |||
#undef private | |||
using namespace std; | |||
using namespace testing; | |||
namespace { | |||
struct AicpuTaskStruct { | |||
aicpu::AicpuParamHead head; | |||
uint64_t io_addrp[6]; | |||
}__attribute__((packed)); | |||
} // namespace | |||
namespace ge { | |||
using namespace hybrid; | |||
class UtestAicpuNodeExecutor : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
}; | |||
static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | |||
OpDescPtr op_desc = std::make_shared<OpDesc>(name, type); | |||
op_desc->SetStreamId(0); | |||
static int32_t index = 0; | |||
op_desc->SetId(index++); | |||
GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); | |||
TensorUtils::SetSize(tensor, 64); | |||
vector<int64_t> input_offset; | |||
for (int i = 0; i < in_num; i++) { | |||
op_desc->AddInputDesc(tensor); | |||
input_offset.emplace_back(i * 64); | |||
} | |||
op_desc->SetInputOffset(input_offset); | |||
vector<int64_t> output_offset; | |||
for (int i = 0; i < out_num; i++) { | |||
op_desc->AddOutputDesc(tensor); | |||
output_offset.emplace_back(in_num * 64 + i * 64); | |||
} | |||
op_desc->SetOutputOffset(output_offset); | |||
return graph->AddNode(op_desc); | |||
} | |||
TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
GeModelPtr ge_sub_model = std::make_shared<GeModel>(); | |||
GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||
ge_root_model->SetModelName("test_name"); | |||
ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model); | |||
HybridModel hybrid_model(ge_root_model); | |||
NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2); | |||
std::unique_ptr<NodeItem> new_node; | |||
ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||
NodeItem *node_item = new_node.get(); | |||
hybrid_model.node_items_[node] = std::move(new_node); | |||
node_item->input_start = 0; | |||
node_item->output_start = 0; | |||
node_item->is_dynamic = true; | |||
node_item->shape_inference_type = DEPEND_COMPUTE; | |||
GraphItem graph_item; | |||
graph_item.node_items_.emplace_back(node_item); | |||
graph_item.total_inputs_ = 4; | |||
graph_item.total_outputs_ = 2; | |||
GraphExecutionContext graph_context; | |||
SubgraphContext subgraph_context(&graph_item, &graph_context); | |||
ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||
graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||
auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||
ASSERT_NE(node_state, nullptr); | |||
for (int i=0; i<4; ++i) { | |||
uint64_t value_0 = 512; | |||
TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||
subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||
} | |||
uint64_t value_0 = 512; | |||
TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||
subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||
uint64_t value_1 = 512; | |||
TensorValue out_tensor1(&value_1, sizeof(value_1)); | |||
subgraph_context.SetOutput(*node_item, 1, out_tensor1); | |||
// task | |||
domi::TaskDef task_def; | |||
domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex(); | |||
kernel_ex_def->set_kernel_ext_info_size(12); | |||
AicpuExtInfo aicpu_ext_info; | |||
aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||
aicpu_ext_info.infoLen = sizeof(int32_t); | |||
int32_t type = node_item->shape_inference_type; | |||
memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||
char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||
memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||
std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||
std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info(); | |||
(*mutable_ext_info) = ext_info; | |||
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||
AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||
ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
AicpuTaskStruct args; | |||
args.head.length = sizeof(args); | |||
args.head.ioAddrNum = 6; | |||
domi::TaskDef task_def2; | |||
task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||
task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||
task_def2.mutable_kernel()->set_args_size(args.head.length); | |||
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2}); | |||
AicpuNodeTask aicpu_node_task(node_item, task_def); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
//kernel_ex_def->set_allocated_kernel_ext_info(nullptr); | |||
free(ext_mem); | |||
} | |||
} // namespace ge | |||