From 191f381cc5251711c4f65ef11f7262f47e583068 Mon Sep 17 00:00:00 2001
From: wangxiaotian22 <wangxiaotian4@huawei.com>
Date: Tue, 13 Jul 2021 11:49:50 +0800
Subject: [PATCH] runtime api transfer

---
 .../task_info/kernel_ex_task_info.cc          |   3 +-
 .../task_info/kernel_ex_task_info.h           |   1 +
 .../task_info/kernel_task_info.cc             |   7 +-
 .../aicpu/aicpu_node_executor.cc              |  15 +-
 inc/external/OWNERS                           |  10 ++
 tests/depends/runtime/src/runtime_stub.cc     |  15 ++
 tests/ut/ge/CMakeLists.txt                    |   1 +
 .../aicpu/aicpu_node_executor_unittest.cc     | 168 ++++++++++++++++++
 8 files changed, 209 insertions(+), 11 deletions(-)
 create mode 100644 inc/external/OWNERS
 create mode 100644 tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
index a4b3de75..ee358b5c 100644
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
@@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   // 1. Copy context from kernelExDef.private to workspace
   uint32_t op_index = kernel_ex_def.op_index();
   OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
+  op_desc_ = op_desc;
   if (op_desc == nullptr) {
     REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index);
     GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index);
@@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() {
   if (topic_type_flag_ > 0) {
     dump_flag_ = dump_flag_ | topic_type_flag_;
   }
-  rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
+  rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
   if (rt_ret != RT_ERROR_NONE) {
     REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret);
     GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret);
diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
index 1b77b715..7d07eb7f 100644
--- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h
@@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo {
   uint32_t dump_flag_;
   uint32_t kernel_buf_size_;
   DavinciModel *davinci_model_;
+  OpDescPtr op_desc_;
   void *kernel_buf_;
   void *input_output_addr_;
   void *ext_info_addr_;
diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
index 07ad63ca..63f4257c 100755
--- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc
@@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() {
     }
     GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
     // blockDim is reserved parameter, set to 1
-    rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
-                                       reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_,
-                                       nullptr, stream_, dump_flag_);
+    std::string op_name = op_desc_->GetName();
+    rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()};
+    rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_,
+                                         nullptr, stream_, dump_flag_);
     call_save_dump_ = true;
   } else {
     /* default: not skt launch */
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
index 820c9b56..cf20303c 100755
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context,
   GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm));
 
   RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start");
-  GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
+  GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
                                  RT_KERNEL_DEFAULT, context.GetStream()));
   RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End");
 
@@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) {
   GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_);
   uint32_t flag = RT_KERNEL_DEFAULT;
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start");
-  GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream()));
+  GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(),
+                                  kernel_buf_->GetSize(), flag, context.GetStream()));
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End");
   GELOGD("Node[%s] launch end.", node_name_.c_str());
   if (need_sync_) {
@@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
   if (kernel_type == ccKernelType::CUST_AI_CPU) {
     flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU);
   }
-  auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),
-                                          reinterpret_cast<const void *>(kernel_name.c_str()),
-                                          1, // default core dim is 1
-                                          args_.get(), args_size_,
-                                          nullptr, context.GetStream(), flag);
+  rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()};
+  auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name,
+                                            1, // default core dim is 1
+                                            args_.get(), args_size_,
+                                            nullptr, context.GetStream(), flag);
   GE_CHK_RT_RET(rt_ret);
   GELOGD("Node[%s] launch task end.", node_name_.c_str());
   return SUCCESS;
diff --git a/inc/external/OWNERS b/inc/external/OWNERS
new file mode 100644
index 00000000..934272a6
--- /dev/null
+++ b/inc/external/OWNERS
@@ -0,0 +1,10 @@
+approvers:
+- gegenhua
+reviewers:
+- wqtshg
+- ji_chen 
+- xchu42
+- sheng-nan
+- wangxiaotian22
+- zhangxiaokun9
+- tangqunzhang
diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc
index 0c9e2c27..510eb1ad 100644
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) {
 rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) {
   return RT_ERROR_NONE;
 }
+
+rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) {
+  return RT_ERROR_NONE;
+}
+
+rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args,
+                                      uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) {
+  return RT_ERROR_NONE;
+}
+
+rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args,
+                              uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) {
+  return RT_ERROR_NONE;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 42fa6128..ebaee921 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -735,6 +735,7 @@ set(HYBRID_TEST_FILES
     "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc"
     "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc"
     "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc"
+    "hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc"
     "hybrid/executor/hybrid_model_async_executor_unittest.cc"
     "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc"
     "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc"
diff --git a/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
new file mode 100644
index 00000000..b225949b
--- /dev/null
+++ b/tests/ut/ge/hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc
@@ -0,0 +1,168 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#define private public
+#define protected public
+#include "graph/runtime_inference_context.h"
+#include "aicpu/common/aicpu_task_struct.h"
+#include "hybrid/executor/subgraph_context.h"
+#include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
+#undef protected
+#undef private
+
+using namespace std;
+using namespace testing;
+
+namespace {
+struct AicpuTaskStruct {
+  aicpu::AicpuParamHead head;
+  uint64_t io_addrp[6];
+}__attribute__((packed));
+}  // namespace
+
+namespace ge {
+using namespace hybrid;
+
+class UtestAicpuNodeExecutor : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() {}
+};
+
+static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>(name, type);
+  op_desc->SetStreamId(0);
+  static int32_t index = 0;
+  op_desc->SetId(index++);
+
+  GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64);
+  TensorUtils::SetSize(tensor, 64);
+  vector<int64_t> input_offset;
+  for (int i = 0; i < in_num; i++) {
+    op_desc->AddInputDesc(tensor);
+    input_offset.emplace_back(i * 64);
+  }
+  op_desc->SetInputOffset(input_offset);
+
+  vector<int64_t> output_offset;
+  for (int i = 0; i < out_num; i++) {
+    op_desc->AddOutputDesc(tensor);
+    output_offset.emplace_back(in_num * 64 + i * 64);
+  }
+  op_desc->SetOutputOffset(output_offset);
+
+  return graph->AddNode(op_desc);
+}
+
+TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
+  GeModelPtr ge_sub_model = std::make_shared<GeModel>();
+  GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
+  ge_root_model->SetModelName("test_name");
+  ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model);
+  HybridModel hybrid_model(ge_root_model);
+
+  NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2);
+
+  std::unique_ptr<NodeItem> new_node;
+  ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS);
+  NodeItem *node_item = new_node.get();
+  hybrid_model.node_items_[node] = std::move(new_node);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+  node_item->is_dynamic = true;
+  node_item->shape_inference_type = DEPEND_COMPUTE;
+
+  GraphItem graph_item;
+  graph_item.node_items_.emplace_back(node_item);
+  graph_item.total_inputs_ = 4;
+  graph_item.total_outputs_ = 2;
+
+  GraphExecutionContext graph_context;
+  SubgraphContext subgraph_context(&graph_item, &graph_context);
+  ASSERT_EQ(subgraph_context.Init(), SUCCESS);
+  graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager());
+
+  auto node_state = subgraph_context.GetOrCreateNodeState(node_item);
+  ASSERT_NE(node_state, nullptr);
+
+  for (int i=0; i<4; ++i) {
+    uint64_t value_0 = 512;
+    TensorValue in_tensor0(&value_0, sizeof(value_0));
+    subgraph_context.SetInput(*node_item, 0, in_tensor0);
+  }
+
+  uint64_t value_0 = 512;
+  TensorValue out_tensor0(&value_0, sizeof(value_0));
+  subgraph_context.SetOutput(*node_item, 0, out_tensor0);
+
+  uint64_t value_1 = 512;
+  TensorValue out_tensor1(&value_1, sizeof(value_1));
+  subgraph_context.SetOutput(*node_item, 1, out_tensor1);
+
+  // task
+  domi::TaskDef task_def;
+  domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex();
+  kernel_ex_def->set_kernel_ext_info_size(12);
+
+  AicpuExtInfo aicpu_ext_info;
+  aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE;
+  aicpu_ext_info.infoLen = sizeof(int32_t);
+  int32_t type = node_item->shape_inference_type;
+  memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t));
+  char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t));
+  memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t));
+  std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t));
+
+  std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info();
+  (*mutable_ext_info) = ext_info;
+
+  hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def});
+
+  AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def);
+
+  ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS);
+  ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);
+
+  AicpuTaskStruct args;
+  args.head.length = sizeof(args);
+  args.head.ioAddrNum = 6;
+
+  domi::TaskDef task_def2;
+  task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL);
+  task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length);
+  task_def2.mutable_kernel()->set_args_size(args.head.length);
+
+  hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2});
+
+  AicpuNodeTask aicpu_node_task(node_item, task_def);
+  ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED);
+  ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS);
+
+
+  //kernel_ex_def->set_allocated_kernel_ext_info(nullptr);
+
+  free(ext_mem);
+
+}
+
+}  // namespace ge
+