runtime api transfer

4 years ago · 191f381cc5
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
  // 1. Copy context from kernelExDef.private to workspace
  uint32_t op_index = kernel_ex_def.op_index();
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
  op_desc_ = op_desc;
  if (op_desc == nullptr) {
    REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
    GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index);
@@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() {
  if (topic_type_flag_ > 0) {
    dump_flag_ = dump_flag_ | topic_type_flag_;
  }
  rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
  rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret);
    GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret);
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
@@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo {
  uint32_t dump_flag_;
  uint32_t kernel_buf_size_;
  DavinciModel *davinci_model_;
  OpDescPtr op_desc_;
  void *kernel_buf_;
  void *input_output_addr_;
  void *ext_info_addr_;
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() {
    }
    GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
    // blockDim is reserved parameter, set to 1
    rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
                                       reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_,
                                       nullptr, stream_, dump_flag_);
    std::string op_name = op_desc_->GetName();
    rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()};
    rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_,
                                         nullptr, stream_, dump_flag_);
    call_save_dump_ = true;
  } else {
    /* default: not skt launch */
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context,
  GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm));

  RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start");
  GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
  GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
                                 RT_KERNEL_DEFAULT, context.GetStream()));
  RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End");

@@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) {
  GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_);
  uint32_t flag = RT_KERNEL_DEFAULT;
  RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start");
  GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream()));
  GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(),
                                  kernel_buf_->GetSize(), flag, context.GetStream()));
  RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End");
  GELOGD("Node[%s] launch end.", node_name_.c_str());
  if (need_sync_) {
@@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
  if (kernel_type == ccKernelType::CUST_AI_CPU) {
    flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU);
  }
  auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),
                                          reinterpret_cast<const void *>(kernel_name.c_str()),
                                          1, // default core dim is 1
                                          args_.get(), args_size_,
                                          nullptr, context.GetStream(), flag);
  rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()};
  auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name,
                                            1, // default core dim is 1
                                            args_.get(), args_size_,
                                            nullptr, context.GetStream(), flag);
  GE_CHK_RT_RET(rt_ret);
  GELOGD("Node[%s] launch task end.", node_name_.c_str());
  return SUCCESS;
--- a/inc/external/OWNERS
+++ b/inc/external/OWNERS
@@ -0,0 +1,10 @@
 approvers:
 - gegenhua
 reviewers:
 - wqtshg
 - ji_chen 
 - xchu42
 - sheng-nan
 - wangxiaotian22
 - zhangxiaokun9
 - tangqunzhang
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) {
 rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) {
  return RT_ERROR_NONE;
 }

 rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) {
  return RT_ERROR_NONE;
 }

 rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args,
                                      uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) {
  return RT_ERROR_NONE;
 }

 rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args,
                              uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) {
  return RT_ERROR_NONE;
 }

 #ifdef __cplusplus
 }
 #endif
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -735,6 +735,7 @@ set(HYBRID_TEST_FILES
    "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc"
    "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc"
    "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc"
    "hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc"
    "hybrid/executor/hybrid_model_async_executor_unittest.cc"
    "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc"
    "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc"
--- a/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
+++ b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
@@ -0,0 +1,168 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <gmock/gmock.h>
 #include <gtest/gtest.h>

 #include <vector>

 #define private public
 #define protected public
 #include "graph/runtime_inference_context.h"
 #include "aicpu/common/aicpu_task_struct.h"
 #include "hybrid/executor/subgraph_context.h"
 #include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
 #undef protected
 #undef private

 using namespace std;
 using namespace testing;

 namespace {
 struct AicpuTaskStruct {
  aicpu::AicpuParamHead head;
  uint64_t io_addrp[6];
 }__attribute__((packed));
 }  // namespace

 namespace ge {
 using namespace hybrid;

 class UtestAicpuNodeExecutor : public testing::Test {
 protected:
  void SetUp() {}
  void TearDown() {}
 };

 static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) {
  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
  op_desc->SetStreamId(0);
  static int32_t index = 0;
  op_desc->SetId(index++);

  GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64);
  TensorUtils::SetSize(tensor, 64);
  vector<int64_t> input_offset;
  for (int i = 0; i < in_num; i++) {
    op_desc->AddInputDesc(tensor);
    input_offset.emplace_back(i * 64);
  }
  op_desc->SetInputOffset(input_offset);

  vector<int64_t> output_offset;
  for (int i = 0; i < out_num; i++) {
    op_desc->AddOutputDesc(tensor);
    output_offset.emplace_back(in_num * 64 + i * 64);
  }
  op_desc->SetOutputOffset(output_offset);

  return graph->AddNode(op_desc);
 }

 TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) {
  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
  ge_root_model->SetModelName("test_name");
  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
  HybridModel hybrid_model(ge_root_model);

  NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2);

  std::unique_ptr<NodeItem> new_node;
  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
  NodeItem *node_item = new_node.get();
  hybrid_model.node_items_[node] = std::move(new_node);
  node_item->input_start = 0;
  node_item->output_start = 0;
  node_item->is_dynamic = true;
  node_item->shape_inference_type = DEPEND_COMPUTE;

  GraphItem graph_item;
  graph_item.node_items_.emplace_back(node_item);
  graph_item.total_inputs_ = 4;
  graph_item.total_outputs_ = 2;

  GraphExecutionContext graph_context;
  SubgraphContext subgraph_context(&graph_item, &graph_context);
  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());

  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
  ASSERT_NE(node_state, nullptr);

  for (int i=0; i<4; ++i) {
    uint64_t value_0 = 512;
    TensorValue in_tensor0(&value_0, sizeof(value_0));
    subgraph_context.SetInput(*node_item, 0, in_tensor0);
  }

  uint64_t value_0 = 512;
  TensorValue out_tensor0(&value_0, sizeof(value_0));
  subgraph_context.SetOutput(*node_item, 0, out_tensor0);

  uint64_t value_1 = 512;
  TensorValue out_tensor1(&value_1, sizeof(value_1));
  subgraph_context.SetOutput(*node_item, 1, out_tensor1);

  // task
  domi::TaskDef task_def;
  domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex();
  kernel_ex_def->set_kernel_ext_info_size(12);

  AicpuExtInfo aicpu_ext_info;
  aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE;
  aicpu_ext_info.infoLen = sizeof(int32_t);
  int32_t type = node_item->shape_inference_type;
  memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t));
  char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t));
  memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t));
  std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t));

  std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info();
  (*mutable_ext_info) = ext_info;

  hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def});

  AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def);

  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
  ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);

  AicpuTaskStruct args;
  args.head.length = sizeof(args);
  args.head.ioAddrNum = 6;

  domi::TaskDef task_def2;
  task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL);
  task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length);
  task_def2.mutable_kernel()->set_args_size(args.head.length);

  hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2});

  AicpuNodeTask aicpu_node_task(node_item, task_def);
  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);
  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);


  //kernel_ex_def->set_allocated_kernel_ext_info(nullptr);

  free(ext_mem);

 }

 }  // namespace ge