Browse Source

!1473 fix resize args_ size when op with workspace do hybrid_model_executor

From: @zhou_lili
Reviewed-by: @xchu42,@youui
Signed-off-by: @youui
tags/v1.3.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
25aecfb52c
3 changed files with 62 additions and 21 deletions
  1. +27
    -21
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  2. +1
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  3. +34
    -0
      tests/ut/ge/hybrid/ge_hybrid_unittest.cc

+ 27
- 21
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -75,7 +75,6 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
output_indices_to_skip_.push_back(i); output_indices_to_skip_.push_back(i);
} }
} }
GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str());
return SUCCESS; return SUCCESS;
} }


@@ -228,19 +227,19 @@ Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDe
} }


const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
offset_ = *args_offset_buffer;
if (offset_ > args_size_) {
GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u," GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u,"
"arg size = %u , op:%s op_type:%s", GetName().c_str(), offset, args_size_,
"arg size = %u , op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str()); op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u" REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u"
"op:%s op_type:%s", GetName().c_str(), offset, args_size_,
"op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str()); op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
max_arg_count_ = (args_size_ - offset) / sizeof(void *);
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_);
max_arg_count_ = (args_size_ - offset_) / sizeof(void *);
GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d," GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d,"
"arg base = %p, arg size = %u", "arg base = %p, arg size = %u",
op_desc.GetName().c_str(), stub_name_.c_str(), op_desc.GetName().c_str(), stub_name_.c_str(),
@@ -289,19 +288,19 @@ Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const do
} }


const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
offset_ = *args_offset_buffer;
if (offset_ > args_size_) {
GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u, arg size = %u" GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u, arg size = %u"
"op:%s op_type:%s", GetName().c_str(), offset, args_size_,
"op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str()); op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u" REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u"
"op:%s op_type:%s", GetName().c_str(), offset, args_size_,
"op:%s op_type:%s", GetName().c_str(), offset_, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str()); op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
max_arg_count_ = (args_size_ - offset) / sizeof(void *);
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_);
max_arg_count_ = (args_size_ - offset_) / sizeof(void *);
return SUCCESS; return SUCCESS;
} }


@@ -428,14 +427,20 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
++expected_arg_count; ++expected_arg_count;
} }
if (expected_arg_count > max_arg_count_) { if (expected_arg_count > max_arg_count_) {
GELOGE(INTERNAL_ERROR,
"[Check][arg_count][%s] Invalid arg memory, max arg count = %u, but expect = %zu",
GetName().c_str(),
max_arg_count_,
expected_arg_count);
REPORT_INNER_ERROR("E19999", "[%s] Invalid arg memory, max arg count = %u, but expect = %zu",
GetName().c_str(), max_arg_count_, expected_arg_count);
return INTERNAL_ERROR;
GELOGD("Need to reset size of args_ from %u to %zu.", max_arg_count_, expected_arg_count);
auto length = expected_arg_count * sizeof(uintptr_t) + offset_;
std::unique_ptr<uint8_t[]> new_args(new(std::nothrow) uint8_t[length]);
GE_CHECK_NOTNULL(new_args);
if (memcpy_s(new_args.get(), length, args_.get(), offset_) != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][new_args]failed, dst length is %zu, src length is %u.",
length, offset_);
REPORT_INNER_ERROR("E19999", "update kernel args failed of %s.", task_context.GetNodeName());
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
args_ = std::move(new_args);
max_arg_count_ = static_cast<uint32_t>(expected_arg_count);
args_size_ = static_cast<uint32_t>(length);
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_);
} }


int index = 0; int index = 0;
@@ -492,6 +497,7 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
} }
GELOGI("[TASK_INFO] %lu/%s", log_id_, log_name_.c_str());
return SUCCESS; return SUCCESS;
} }




+ 1
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -116,6 +116,7 @@ class AiCoreOpTask {
bool is_dynamic_ = false; bool is_dynamic_ = false;
uint64_t log_id_ = 0; uint64_t log_id_ = 0;
std::string log_name_; std::string log_name_;
uint32_t offset_ = 0;
}; };


class AtomicAddrCleanOpTask : public AiCoreOpTask { class AtomicAddrCleanOpTask : public AiCoreOpTask {


+ 34
- 0
tests/ut/ge/hybrid/ge_hybrid_unittest.cc View File

@@ -426,6 +426,40 @@ TEST_F(UtestGeHybrid, TestTaskContext) {
ASSERT_EQ(new_desc.GetShape().GetDims(), new_shape.GetDims()); ASSERT_EQ(new_desc.GetShape().GetDims(), new_shape.GetDims());
} }


TEST_F(UtestGeHybrid, hybrid_model_executor_update_args) {
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());

auto graph = make_shared<ComputeGraph>("graph");
OpDescPtr op_desc = CreateOpDesc("Add", "Add");
GeShape shape({2, 16});
GeTensorDesc tensor_desc(shape);
op_desc->AddInputDesc(tensor_desc);
op_desc->AddInputDesc(tensor_desc);
op_desc->AddOutputDesc(tensor_desc);
auto node = graph->AddNode(op_desc);

std::unique_ptr<NodeItem> node_item;
NodeItem::Create(node, node_item);
node_item->input_start = 0;
node_item->output_start = 0;

GraphExecutionContext execution_context;
SubgraphContext subgraph_context(nullptr, &execution_context);
subgraph_context.all_inputs_.resize(2);
subgraph_context.all_outputs_.resize(1);

NodeState node_state(*node_item, &subgraph_context);
auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);

int32_t buffer[1];
aicore_task->tiling_buffer_ = TensorBuffer::Create(buffer, sizeof(buffer));
EXPECT_NE(aicore_task->tiling_buffer_, nullptr);
aicore_task->max_arg_count_ = 0;
EXPECT_EQ(aicore_task->UpdateArgs(*task_context), ACL_ERROR_GE_MEMORY_OPERATE_FAILED);
aicore_task->args_ = std::unique_ptr<uint8_t[]>(new uint8_t[sizeof(uintptr_t) * 2]);
EXPECT_EQ(aicore_task->UpdateArgs(*task_context), SUCCESS);
}

TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) { TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) {
HybridModelExecutor::ExecuteArgs args; HybridModelExecutor::ExecuteArgs args;
GeTensorDescPtr ge_tensor = make_shared<GeTensorDesc>(GeTensorDesc()); GeTensorDescPtr ge_tensor = make_shared<GeTensorDesc>(GeTensorDesc());


Loading…
Cancel
Save