From 191f381cc5251711c4f65ef11f7262f47e583068 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Tue, 13 Jul 2021 11:49:50 +0800 Subject: [PATCH] runtime api transfer --- .../task_info/kernel_ex_task_info.cc | 3 +- .../task_info/kernel_ex_task_info.h | 1 + .../task_info/kernel_task_info.cc | 7 +- .../aicpu/aicpu_node_executor.cc | 15 +- inc/external/OWNERS | 10 ++ tests/depends/runtime/src/runtime_stub.cc | 15 ++ tests/ut/ge/CMakeLists.txt | 1 + .../aicpu/aicpu_node_executor_unittest.cc | 168 ++++++++++++++++++ 8 files changed, 209 insertions(+), 11 deletions(-) create mode 100644 inc/external/OWNERS create mode 100644 tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index a4b3de75..ee358b5c 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin // 1. Copy context from kernelExDef.private to workspace uint32_t op_index = kernel_ex_def.op_index(); OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); + op_desc_ = op_desc; if (op_desc == nullptr) { REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); @@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() { if (topic_type_flag_ > 0) { dump_flag_ = dump_flag_ | topic_type_flag_; } - rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); + rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index 1b77b715..7d07eb7f 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo { uint32_t dump_flag_; uint32_t kernel_buf_size_; DavinciModel *davinci_model_; + OpDescPtr op_desc_; void *kernel_buf_; void *input_output_addr_; void *ext_info_addr_; diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc index 07ad63ca..63f4257c 100755 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() { } GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); // blockDim is reserved parameter, set to 1 - rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name_.c_str()), - reinterpret_cast(kernel_name_.c_str()), 1, args_, args_size_, - nullptr, stream_, dump_flag_); + std::string op_name = op_desc_->GetName(); + rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()}; + rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_, + nullptr, stream_, dump_flag_); call_save_dump_ = true; } else { /* default: not skt launch */ diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 820c9b56..cf20303c 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, context.GetStream())); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); @@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); uint32_t flag = RT_KERNEL_DEFAULT; RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); + GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(), + kernel_buf_->GetSize(), flag, context.GetStream())); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); GELOGD("Node[%s] launch end.", node_name_.c_str()); if (need_sync_) { @@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { if (kernel_type == ccKernelType::CUST_AI_CPU) { flag |= static_cast(RT_KERNEL_CUSTOM_AICPU); } - auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), - reinterpret_cast(kernel_name.c_str()), - 1, // default core dim is 1 - args_.get(), args_size_, - nullptr, context.GetStream(), flag); + rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()}; + auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, + 1, // default core dim is 1 + args_.get(), args_size_, + nullptr, context.GetStream(), flag); GE_CHK_RT_RET(rt_ret); GELOGD("Node[%s] launch task end.", node_name_.c_str()); return SUCCESS; diff --git a/inc/external/OWNERS b/inc/external/OWNERS new file mode 100644 index 00000000..934272a6 --- /dev/null +++ b/inc/external/OWNERS @@ -0,0 +1,10 @@ +approvers: +- gegenhua +reviewers: +- wqtshg +- ji_chen +- xchu42 +- sheng-nan +- wangxiaotian22 +- zhangxiaokun9 +- tangqunzhang diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 0c9e2c27..510eb1ad 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { return RT_ERROR_NONE; } + +rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { + return RT_ERROR_NONE; +} + +rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, + uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) { + return RT_ERROR_NONE; +} + +rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, + uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) { + return RT_ERROR_NONE; +} + #ifdef __cplusplus } #endif diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 42fa6128..ebaee921 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -735,6 +735,7 @@ set(HYBRID_TEST_FILES "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" + "hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc" "hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" diff --git a/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc new file mode 100644 index 00000000..b225949b --- /dev/null +++ b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc @@ -0,0 +1,168 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#define private public +#define protected public +#include "graph/runtime_inference_context.h" +#include "aicpu/common/aicpu_task_struct.h" +#include "hybrid/executor/subgraph_context.h" +#include "hybrid/node_executor/aicpu/aicpu_node_executor.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; + +namespace { +struct AicpuTaskStruct { + aicpu::AicpuParamHead head; + uint64_t io_addrp[6]; +}__attribute__((packed)); +} // namespace + +namespace ge { +using namespace hybrid; + +class UtestAicpuNodeExecutor : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { + OpDescPtr op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + static int32_t index = 0; + op_desc->SetId(index++); + + GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); + TensorUtils::SetSize(tensor, 64); + vector input_offset; + for (int i = 0; i < in_num; i++) { + op_desc->AddInputDesc(tensor); + input_offset.emplace_back(i * 64); + } + op_desc->SetInputOffset(input_offset); + + vector output_offset; + for (int i = 0; i < out_num; i++) { + op_desc->AddOutputDesc(tensor); + output_offset.emplace_back(in_num * 64 + i * 64); + } + op_desc->SetOutputOffset(output_offset); + + return graph->AddNode(op_desc); +} + +TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { + ComputeGraphPtr graph = std::make_shared("test"); + GeModelPtr ge_sub_model = std::make_shared(); + GeRootModelPtr ge_root_model = std::make_shared(graph); + ge_root_model->SetModelName("test_name"); + ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model); + HybridModel hybrid_model(ge_root_model); + + NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2); + + std::unique_ptr new_node; + ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); + NodeItem *node_item = new_node.get(); + hybrid_model.node_items_[node] = std::move(new_node); + node_item->input_start = 0; + node_item->output_start = 0; + node_item->is_dynamic = true; + node_item->shape_inference_type = DEPEND_COMPUTE; + + GraphItem graph_item; + graph_item.node_items_.emplace_back(node_item); + graph_item.total_inputs_ = 4; + graph_item.total_outputs_ = 2; + + GraphExecutionContext graph_context; + SubgraphContext subgraph_context(&graph_item, &graph_context); + ASSERT_EQ(subgraph_context.Init(), SUCCESS); + graph_context.callback_manager = std::unique_ptr(new CallbackManager()); + + auto node_state = subgraph_context.GetOrCreateNodeState(node_item); + ASSERT_NE(node_state, nullptr); + + for (int i=0; i<4; ++i) { + uint64_t value_0 = 512; + TensorValue in_tensor0(&value_0, sizeof(value_0)); + subgraph_context.SetInput(*node_item, 0, in_tensor0); + } + + uint64_t value_0 = 512; + TensorValue out_tensor0(&value_0, sizeof(value_0)); + subgraph_context.SetOutput(*node_item, 0, out_tensor0); + + uint64_t value_1 = 512; + TensorValue out_tensor1(&value_1, sizeof(value_1)); + subgraph_context.SetOutput(*node_item, 1, out_tensor1); + + // task + domi::TaskDef task_def; + domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex(); + kernel_ex_def->set_kernel_ext_info_size(12); + + AicpuExtInfo aicpu_ext_info; + aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; + aicpu_ext_info.infoLen = sizeof(int32_t); + int32_t type = node_item->shape_inference_type; + memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); + char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t)); + memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t)); + std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t)); + + std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info(); + (*mutable_ext_info) = ext_info; + + hybrid_model.task_defs_[node] = std::vector({task_def, task_def}); + + AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); + + ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); + ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); + + AicpuTaskStruct args; + args.head.length = sizeof(args); + args.head.ioAddrNum = 6; + + domi::TaskDef task_def2; + task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL); + task_def2.mutable_kernel()->set_args(reinterpret_cast(&args), args.head.length); + task_def2.mutable_kernel()->set_args_size(args.head.length); + + hybrid_model.task_defs_[node] = std::vector({task_def2}); + + AicpuNodeTask aicpu_node_task(node_item, task_def); + ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); + ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); + + + //kernel_ex_def->set_allocated_kernel_ext_info(nullptr); + + free(ext_mem); + +} + +} // namespace ge +