From 234abf4cb1c915283a5704e3aca7a9641b92b548 Mon Sep 17 00:00:00 2001
From: zhou_lili <zhoulili20@huawei.com>
Date: Thu, 8 Apr 2021 10:48:30 +0800
Subject: [PATCH] fix resize args_ when op with workspace do
 hybrid_model_executor

---
 .../node_executor/aicore/aicore_op_task.cc    | 48 +++++++++++--------
 .../node_executor/aicore/aicore_op_task.h     |  1 +
 tests/ut/ge/hybrid/ge_hybrid_unittest.cc      | 34 +++++++++++++
 3 files changed, 62 insertions(+), 21 deletions(-)
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 06340119..8bb871fb 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -75,7 +75,6 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
       output_indices_to_skip_.push_back(i);
     }
   }
-  GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str());
   return SUCCESS;
 }
 
@@ -228,19 +227,19 @@ Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDe
   }
 
   const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
-  uint32_t offset = *args_offset_buffer;
-  if (offset > args_size_) {
+  offset_ = *args_offset_buffer;
+  if (offset_ > args_size_) {
     GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u,"
-           "arg size = %u , op:%s op_type:%s", GetName().c_str(), offset, args_size_,
+           "arg size = %u , op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
            op_desc.GetName().c_str(), op_desc.GetType().c_str());
     REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u"
-                       "op:%s op_type:%s", GetName().c_str(), offset, args_size_,
+                       "op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
                        op_desc.GetName().c_str(), op_desc.GetType().c_str());
     return INTERNAL_ERROR;
   }
 
-  arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
-  max_arg_count_ = (args_size_ - offset) / sizeof(void *);
+  arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_);
+  max_arg_count_ = (args_size_ - offset_) / sizeof(void *);
   GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d,"
          "arg base = %p, arg size = %u",
          op_desc.GetName().c_str(),  stub_name_.c_str(),
@@ -289,19 +288,19 @@ Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const do
   }
 
   const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
-  uint32_t offset = *args_offset_buffer;
-  if (offset > args_size_) {
+  offset_ = *args_offset_buffer;
+  if (offset_ > args_size_) {
     GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u, arg size = %u"
-           "op:%s op_type:%s", GetName().c_str(), offset, args_size_,
+           "op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
            op_desc.GetName().c_str(), op_desc.GetType().c_str());
     REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u"
-                       "op:%s op_type:%s", GetName().c_str(), offset, args_size_,
+                       "op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
                        op_desc.GetName().c_str(), op_desc.GetType().c_str());
     return INTERNAL_ERROR;
   }
 
-  arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
-  max_arg_count_ = (args_size_ - offset) / sizeof(void *);
+  arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_);
+  max_arg_count_ = (args_size_ - offset_) / sizeof(void *);
   return SUCCESS;
 }
 
@@ -428,14 +427,20 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
     ++expected_arg_count;
   }
   if (expected_arg_count > max_arg_count_) {
-    GELOGE(INTERNAL_ERROR,
-           "[Check][arg_count][%s] Invalid arg memory, max arg count = %u, but expect = %zu",
-           GetName().c_str(),
-           max_arg_count_,
-           expected_arg_count);
-    REPORT_INNER_ERROR("E19999", "[%s] Invalid arg memory, max arg count = %u, but expect = %zu",
-                       GetName().c_str(), max_arg_count_, expected_arg_count);
-    return INTERNAL_ERROR;
+    GELOGD("Need to reset size of args_ from %u to %zu.", max_arg_count_, expected_arg_count);
+    auto length = expected_arg_count * sizeof(uintptr_t) + offset_;
+    std::unique_ptr<uint8_t[]> new_args(new(std::nothrow) uint8_t[length]);
+    GE_CHECK_NOTNULL(new_args);
+    if (memcpy_s(new_args.get(), length, args_.get(), offset_) != EOK) {
+      GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][new_args]failed, dst length is %zu, src length is %u.",
+             length, offset_);
+      REPORT_INNER_ERROR("E19999", "update kernel args failed of %s.", task_context.GetNodeName());
+      return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
+    }
+    args_ = std::move(new_args);
+    max_arg_count_ = static_cast<uint32_t>(expected_arg_count);
+    args_size_ = static_cast<uint32_t>(length);
+    arg_base_  = reinterpret_cast<uintptr_t *>(args_.get() + offset_);
   }
 
   int index = 0;
@@ -492,6 +497,7 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
     GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
     GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
   }
+  GELOGI("[TASK_INFO] %lu/%s", log_id_, log_name_.c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h
index fe18bfd0..8d7b7f1e 100755
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -116,6 +116,7 @@ class AiCoreOpTask {
   bool is_dynamic_ = false;
   uint64_t log_id_ = 0;
   std::string log_name_;
+  uint32_t offset_ = 0;
 };
 
 class AtomicAddrCleanOpTask : public AiCoreOpTask {
diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
index 274cc56f..9746585d 100644
--- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
+++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
@@ -426,6 +426,40 @@ TEST_F(UtestGeHybrid, TestTaskContext) {
   ASSERT_EQ(new_desc.GetShape().GetDims(), new_shape.GetDims());
 }
 
+TEST_F(UtestGeHybrid, hybrid_model_executor_update_args) {
+  auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
+
+  auto graph = make_shared<ComputeGraph>("graph");
+  OpDescPtr op_desc = CreateOpDesc("Add", "Add");
+  GeShape shape({2, 16});
+  GeTensorDesc tensor_desc(shape);
+  op_desc->AddInputDesc(tensor_desc);
+  op_desc->AddInputDesc(tensor_desc);
+  op_desc->AddOutputDesc(tensor_desc);
+  auto node = graph->AddNode(op_desc);
+
+  std::unique_ptr<NodeItem> node_item;
+  NodeItem::Create(node, node_item);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphExecutionContext execution_context;
+  SubgraphContext subgraph_context(nullptr, &execution_context);
+  subgraph_context.all_inputs_.resize(2);
+  subgraph_context.all_outputs_.resize(1);
+
+  NodeState node_state(*node_item, &subgraph_context);
+  auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
+
+  int32_t buffer[1];
+  aicore_task->tiling_buffer_ = TensorBuffer::Create(buffer, sizeof(buffer));
+  EXPECT_NE(aicore_task->tiling_buffer_, nullptr);
+  aicore_task->max_arg_count_ = 0;
+  EXPECT_EQ(aicore_task->UpdateArgs(*task_context), ACL_ERROR_GE_MEMORY_OPERATE_FAILED);
+  aicore_task->args_ = std::unique_ptr<uint8_t[]>(new uint8_t[sizeof(uintptr_t) * 2]);
+  EXPECT_EQ(aicore_task->UpdateArgs(*task_context), SUCCESS);
+}
+
 TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) {
   HybridModelExecutor::ExecuteArgs args;
   GeTensorDescPtr ge_tensor = make_shared<GeTensorDesc>(GeTensorDesc());