Browse Source

!1982 runtime api transfer

Merge pull request !1982 from 王笑天/master
tags/v1.5.1
i-robot Gitee 3 years ago
parent
commit
47c0630566
8 changed files with 209 additions and 11 deletions
  1. +2
    -1
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
  2. +1
    -0
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
  3. +4
    -3
      ge/graph/load/model_manager/task_info/kernel_task_info.cc
  4. +8
    -7
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  5. +10
    -0
      inc/external/OWNERS
  6. +15
    -0
      tests/depends/runtime/src/runtime_stub.cc
  7. +1
    -0
      tests/ut/ge/CMakeLists.txt
  8. +168
    -0
      tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc

+ 2
- 1
ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc View File

@@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
// 1. Copy context from kernelExDef.private to workspace // 1. Copy context from kernelExDef.private to workspace
uint32_t op_index = kernel_ex_def.op_index(); uint32_t op_index = kernel_ex_def.op_index();
OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
op_desc_ = op_desc;
if (op_desc == nullptr) { if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index);
@@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() {
if (topic_type_flag_ > 0) { if (topic_type_flag_ > 0) {
dump_flag_ = dump_flag_ | static_cast<uint32_t>(topic_type_flag_); dump_flag_ = dump_flag_ | static_cast<uint32_t>(topic_type_flag_);
} }
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret);
GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret);


+ 1
- 0
ge/graph/load/model_manager/task_info/kernel_ex_task_info.h View File

@@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo {
uint32_t dump_flag_; uint32_t dump_flag_;
uint32_t kernel_buf_size_; uint32_t kernel_buf_size_;
DavinciModel *davinci_model_; DavinciModel *davinci_model_;
OpDescPtr op_desc_;
void *kernel_buf_; void *kernel_buf_;
void *input_output_addr_; void *input_output_addr_;
void *ext_info_addr_; void *ext_info_addr_;


+ 4
- 3
ge/graph/load/model_manager/task_info/kernel_task_info.cc View File

@@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() {
} }
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
// blockDim is reserved parameter, set to 1 // blockDim is reserved parameter, set to 1
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_,
nullptr, stream_, dump_flag_);
std::string op_name = op_desc_->GetName();
rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()};
rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_,
nullptr, stream_, dump_flag_);
call_save_dump_ = true; call_save_dump_ = true;
} else { } else {
/* default: not skt launch */ /* default: not skt launch */


+ 8
- 7
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context,
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm));


RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start");
GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
RT_KERNEL_DEFAULT, context.GetStream())); RT_KERNEL_DEFAULT, context.GetStream()));
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End");


@@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) {
GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_);
uint32_t flag = RT_KERNEL_DEFAULT; uint32_t flag = RT_KERNEL_DEFAULT;
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start");
GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream()));
GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(),
kernel_buf_->GetSize(), flag, context.GetStream()));
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End");
GELOGD("Node[%s] launch end.", node_name_.c_str()); GELOGD("Node[%s] launch end.", node_name_.c_str());
if (need_sync_) { if (need_sync_) {
@@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
if (kernel_type == ccKernelType::CUST_AI_CPU) { if (kernel_type == ccKernelType::CUST_AI_CPU) {
flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU);
} }
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),
reinterpret_cast<const void *>(kernel_name.c_str()),
1, // default core dim is 1
args_.get(), args_size_,
nullptr, context.GetStream(), flag);
rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()};
auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name,
1, // default core dim is 1
args_.get(), args_size_,
nullptr, context.GetStream(), flag);
GE_CHK_RT_RET(rt_ret); GE_CHK_RT_RET(rt_ret);
GELOGD("Node[%s] launch task end.", node_name_.c_str()); GELOGD("Node[%s] launch task end.", node_name_.c_str());
return SUCCESS; return SUCCESS;


+ 10
- 0
inc/external/OWNERS View File

@@ -0,0 +1,10 @@
approvers:
- gegenhua
reviewers:
- wqtshg
- ji_chen
- xchu42
- sheng-nan
- wangxiaotian22
- zhangxiaokun9
- tangqunzhang

+ 15
- 0
tests/depends/runtime/src/runtime_stub.cc View File

@@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) {
rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) {
return RT_ERROR_NONE; return RT_ERROR_NONE;
} }

rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) {
return RT_ERROR_NONE;
}

rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args,
uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) {
return RT_ERROR_NONE;
}

rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args,
uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) {
return RT_ERROR_NONE;
}

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -736,6 +736,7 @@ set(HYBRID_TEST_FILES
"hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc"
"hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc"
"hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc"
"hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc"
"hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_async_executor_unittest.cc"
"hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc"
"hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc"


+ 168
- 0
tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc View File

@@ -0,0 +1,168 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <vector>
#define private public
#define protected public
#include "graph/runtime_inference_context.h"
#include "aicpu/common/aicpu_task_struct.h"
#include "hybrid/executor/subgraph_context.h"
#include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
#undef protected
#undef private
using namespace std;
using namespace testing;
namespace {
struct AicpuTaskStruct {
aicpu::AicpuParamHead head;
uint64_t io_addrp[6];
}__attribute__((packed));
} // namespace
namespace ge {
using namespace hybrid;
class UtestAicpuNodeExecutor : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) {
OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
op_desc->SetStreamId(0);
static int32_t index = 0;
op_desc->SetId(index++);
GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64);
TensorUtils::SetSize(tensor, 64);
vector<int64_t> input_offset;
for (int i = 0; i < in_num; i++) {
op_desc->AddInputDesc(tensor);
input_offset.emplace_back(i * 64);
}
op_desc->SetInputOffset(input_offset);
vector<int64_t> output_offset;
for (int i = 0; i < out_num; i++) {
op_desc->AddOutputDesc(tensor);
output_offset.emplace_back(in_num * 64 + i * 64);
}
op_desc->SetOutputOffset(output_offset);
return graph->AddNode(op_desc);
}
TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) {
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
GeModelPtr ge_sub_model = std::make_shared<GeModel>();
GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
ge_root_model->SetModelName("test_name");
ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
HybridModel hybrid_model(ge_root_model);
NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2);
std::unique_ptr<NodeItem> new_node;
ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
NodeItem *node_item = new_node.get();
hybrid_model.node_items_[node] = std::move(new_node);
node_item->input_start = 0;
node_item->output_start = 0;
node_item->is_dynamic = true;
node_item->shape_inference_type = DEPEND_COMPUTE;
GraphItem graph_item;
graph_item.node_items_.emplace_back(node_item);
graph_item.total_inputs_ = 4;
graph_item.total_outputs_ = 2;
GraphExecutionContext graph_context;
SubgraphContext subgraph_context(&graph_item, &graph_context);
ASSERT_EQ(subgraph_context.Init(), SUCCESS);
graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
ASSERT_NE(node_state, nullptr);
for (int i=0; i<4; ++i) {
uint64_t value_0 = 512;
TensorValue in_tensor0(&value_0, sizeof(value_0));
subgraph_context.SetInput(*node_item, 0, in_tensor0);
}
uint64_t value_0 = 512;
TensorValue out_tensor0(&value_0, sizeof(value_0));
subgraph_context.SetOutput(*node_item, 0, out_tensor0);
uint64_t value_1 = 512;
TensorValue out_tensor1(&value_1, sizeof(value_1));
subgraph_context.SetOutput(*node_item, 1, out_tensor1);
// task
domi::TaskDef task_def;
domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex();
kernel_ex_def->set_kernel_ext_info_size(12);
AicpuExtInfo aicpu_ext_info;
aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE;
aicpu_ext_info.infoLen = sizeof(int32_t);
int32_t type = node_item->shape_inference_type;
memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t));
char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t));
memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t));
std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t));
std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info();
(*mutable_ext_info) = ext_info;
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def});
AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def);
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);
AicpuTaskStruct args;
args.head.length = sizeof(args);
args.head.ioAddrNum = 6;
domi::TaskDef task_def2;
task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL);
task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length);
task_def2.mutable_kernel()->set_args_size(args.head.length);
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2});
AicpuNodeTask aicpu_node_task(node_item, task_def);
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);
//kernel_ex_def->set_allocated_kernel_ext_info(nullptr);
free(ext_mem);
}
} // namespace ge

Loading…
Cancel
Save