From 234abf4cb1c915283a5704e3aca7a9641b92b548 Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Thu, 8 Apr 2021 10:48:30 +0800 Subject: [PATCH] fix resize args_ when op with workspace do hybrid_model_executor --- .../node_executor/aicore/aicore_op_task.cc | 48 +++++++++++-------- .../node_executor/aicore/aicore_op_task.h | 1 + tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 34 +++++++++++++ 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 06340119..8bb871fb 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -75,7 +75,6 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) output_indices_to_skip_.push_back(i); } } - GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str()); return SUCCESS; } @@ -228,19 +227,19 @@ Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDe } const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); - uint32_t offset = *args_offset_buffer; - if (offset > args_size_) { + offset_ = *args_offset_buffer; + if (offset_ > args_size_) { GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u," - "arg size = %u , op:%s op_type:%s", GetName().c_str(), offset, args_size_, + "arg size = %u , op:%s op_type:%s", GetName().c_str(), offset_, args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str()); REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u" - "op:%s op_type:%s", GetName().c_str(), offset, args_size_, + "op:%s op_type:%s", GetName().c_str(), offset_, args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str()); return INTERNAL_ERROR; } - arg_base_ = reinterpret_cast(args_.get() + offset); - max_arg_count_ = (args_size_ - offset) / sizeof(void *); + arg_base_ = reinterpret_cast(args_.get() + offset_); + max_arg_count_ = (args_size_ - offset_) / sizeof(void *); GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d," "arg base = %p, arg size = %u", op_desc.GetName().c_str(), stub_name_.c_str(), @@ -289,19 +288,19 @@ Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const do } const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); - uint32_t offset = *args_offset_buffer; - if (offset > args_size_) { + offset_ = *args_offset_buffer; + if (offset_ > args_size_) { GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u, arg size = %u" - "op:%s op_type:%s", GetName().c_str(), offset, args_size_, + "op:%s op_type:%s", GetName().c_str(), offset_, args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str()); REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u" - "op:%s op_type:%s", GetName().c_str(), offset, args_size_, + "op:%s op_type:%s", GetName().c_str(), offset_, args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str()); return INTERNAL_ERROR; } - arg_base_ = reinterpret_cast(args_.get() + offset); - max_arg_count_ = (args_size_ - offset) / sizeof(void *); + arg_base_ = reinterpret_cast(args_.get() + offset_); + max_arg_count_ = (args_size_ - offset_) / sizeof(void *); return SUCCESS; } @@ -428,14 +427,20 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { ++expected_arg_count; } if (expected_arg_count > max_arg_count_) { - GELOGE(INTERNAL_ERROR, - "[Check][arg_count][%s] Invalid arg memory, max arg count = %u, but expect = %zu", - GetName().c_str(), - max_arg_count_, - expected_arg_count); - REPORT_INNER_ERROR("E19999", "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", - GetName().c_str(), max_arg_count_, expected_arg_count); - return INTERNAL_ERROR; + GELOGD("Need to reset size of args_ from %u to %zu.", max_arg_count_, expected_arg_count); + auto length = expected_arg_count * sizeof(uintptr_t) + offset_; + std::unique_ptr new_args(new(std::nothrow) uint8_t[length]); + GE_CHECK_NOTNULL(new_args); + if (memcpy_s(new_args.get(), length, args_.get(), offset_) != EOK) { + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][new_args]failed, dst length is %zu, src length is %u.", + length, offset_); + REPORT_INNER_ERROR("E19999", "update kernel args failed of %s.", task_context.GetNodeName()); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; + } + args_ = std::move(new_args); + max_arg_count_ = static_cast(expected_arg_count); + args_size_ = static_cast(length); + arg_base_ = reinterpret_cast(args_.get() + offset_); } int index = 0; @@ -492,6 +497,7 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); } + GELOGI("[TASK_INFO] %lu/%s", log_id_, log_name_.c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index fe18bfd0..8d7b7f1e 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -116,6 +116,7 @@ class AiCoreOpTask { bool is_dynamic_ = false; uint64_t log_id_ = 0; std::string log_name_; + uint32_t offset_ = 0; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 274cc56f..9746585d 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -426,6 +426,40 @@ TEST_F(UtestGeHybrid, TestTaskContext) { ASSERT_EQ(new_desc.GetShape().GetDims(), new_shape.GetDims()); } +TEST_F(UtestGeHybrid, hybrid_model_executor_update_args) { + auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); + + auto graph = make_shared("graph"); + OpDescPtr op_desc = CreateOpDesc("Add", "Add"); + GeShape shape({2, 16}); + GeTensorDesc tensor_desc(shape); + op_desc->AddInputDesc(tensor_desc); + op_desc->AddInputDesc(tensor_desc); + op_desc->AddOutputDesc(tensor_desc); + auto node = graph->AddNode(op_desc); + + std::unique_ptr node_item; + NodeItem::Create(node, node_item); + node_item->input_start = 0; + node_item->output_start = 0; + + GraphExecutionContext execution_context; + SubgraphContext subgraph_context(nullptr, &execution_context); + subgraph_context.all_inputs_.resize(2); + subgraph_context.all_outputs_.resize(1); + + NodeState node_state(*node_item, &subgraph_context); + auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); + + int32_t buffer[1]; + aicore_task->tiling_buffer_ = TensorBuffer::Create(buffer, sizeof(buffer)); + EXPECT_NE(aicore_task->tiling_buffer_, nullptr); + aicore_task->max_arg_count_ = 0; + EXPECT_EQ(aicore_task->UpdateArgs(*task_context), ACL_ERROR_GE_MEMORY_OPERATE_FAILED); + aicore_task->args_ = std::unique_ptr(new uint8_t[sizeof(uintptr_t) * 2]); + EXPECT_EQ(aicore_task->UpdateArgs(*task_context), SUCCESS); +} + TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) { HybridModelExecutor::ExecuteArgs args; GeTensorDescPtr ge_tensor = make_shared(GeTensorDesc());