Browse Source

!1738 support memcpy in singleop

From: @yangyongqiang5033
Reviewed-by: @xchu42
Signed-off-by:
tags/v1.3.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
a4158fac51
10 changed files with 181 additions and 9 deletions
  1. +1
    -0
      ge/CMakeLists.txt
  2. +1
    -0
      ge/executor/CMakeLists.txt
  3. +18
    -9
      ge/single_op/single_op_model.cc
  4. +1
    -0
      ge/single_op/single_op_model.h
  5. +14
    -0
      ge/single_op/task/op_task.cc
  6. +19
    -0
      ge/single_op/task/op_task.h
  7. +45
    -0
      ge/single_op/task/rts_kernel_task_builder.cc
  8. +34
    -0
      ge/single_op/task/rts_kernel_task_builder.h
  9. +1
    -0
      tests/ut/ge/CMakeLists.txt
  10. +47
    -0
      tests/ut/ge/single_op/single_op_model_unittest.cc

+ 1
- 0
ge/CMakeLists.txt View File

@@ -380,6 +380,7 @@ set(TRAIN_SRC_LIST
"single_op/task/tbe_task_builder.cc" "single_op/task/tbe_task_builder.cc"
"single_op/task/aicpu_task_builder.cc" "single_op/task/aicpu_task_builder.cc"
"single_op/task/aicpu_kernel_task_builder.cc" "single_op/task/aicpu_kernel_task_builder.cc"
"single_op/task/rts_kernel_task_builder.cc"
"hybrid/common/tensor_value.cc" "hybrid/common/tensor_value.cc"
"hybrid/common/npu_memory_allocator.cc" "hybrid/common/npu_memory_allocator.cc"
"hybrid/executor/rt_callback_manager.cc" "hybrid/executor/rt_callback_manager.cc"


+ 1
- 0
ge/executor/CMakeLists.txt View File

@@ -65,6 +65,7 @@ set(SRC_LIST
"../single_op/task/tbe_task_builder.cc" "../single_op/task/tbe_task_builder.cc"
"../single_op/task/aicpu_task_builder.cc" "../single_op/task/aicpu_task_builder.cc"
"../single_op/task/aicpu_kernel_task_builder.cc" "../single_op/task/aicpu_kernel_task_builder.cc"
"../single_op/task/rts_kernel_task_builder.cc"
"../hybrid/common/tensor_value.cc" "../hybrid/common/tensor_value.cc"
"../hybrid/common/npu_memory_allocator.cc" "../hybrid/common/npu_memory_allocator.cc"
"../hybrid/executor/rt_callback_manager.cc" "../hybrid/executor/rt_callback_manager.cc"


+ 18
- 9
ge/single_op/single_op_model.cc View File

@@ -30,6 +30,7 @@
#include "runtime/rt.h" #include "runtime/rt.h"
#include "task/aicpu_task_builder.h" #include "task/aicpu_task_builder.h"
#include "task/aicpu_kernel_task_builder.h" #include "task/aicpu_kernel_task_builder.h"
#include "task/rts_kernel_task_builder.h"
#include "task/tbe_task_builder.h" #include "task/tbe_task_builder.h"
#include "hybrid/executor/hybrid_model_executor.h" #include "hybrid/executor/hybrid_model_executor.h"
#include "hybrid/node_executor/node_executor.h" #include "hybrid/node_executor/node_executor.h"
@@ -266,7 +267,9 @@ Status SingleOpModel::ParseInputsAndOutputs() {
for (auto &op_desc : data_ops_) { for (auto &op_desc : data_ops_) {
GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc)); GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc));
} }
ParseOutputNode(netoutput_op_);
if (netoutput_op_ != nullptr) {
ParseOutputNode(netoutput_op_);
}
return SUCCESS; return SUCCESS;
} }


@@ -323,10 +326,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
OpTask *task = nullptr; OpTask *task = nullptr;
uint64_t singleop_kernel_id = aicpu_kernel_id++; uint64_t singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id); GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id);
auto ret = BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id);
if (ret != SUCCESS) {
return ret;
}
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id));
task->SetModelArgs(model_name_, model_id_); task->SetModelArgs(model_name_, model_id_);
ParseArgTable(task, single_op); ParseArgTable(task, single_op);
single_op.tasks_.emplace_back(task); single_op.tasks_.emplace_back(task);
@@ -345,13 +345,22 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
bool depend_compute_flag = false; bool depend_compute_flag = false;
uint64_t singleop_kernel_id = aicpu_kernel_id++; uint64_t singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id); GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id);
auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id);
if (ret != SUCCESS) {
return ret;
}
GE_CHK_STATUS_RET_NOLOG(
BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id));
aicpu_task->SetModelArgs(model_name_, model_id_); aicpu_task->SetModelArgs(model_name_, model_id_);
ParseArgTable(aicpu_task, single_op); ParseArgTable(aicpu_task, single_op);
single_op.tasks_.emplace_back(aicpu_task); single_op.tasks_.emplace_back(aicpu_task);
} else if ((task_type == RT_MODEL_TASK_MEMCPY_ASYNC) || (task_type == RT_MODEL_TASK_MEMCPY_ADDR_ASYNC)) {
auto kernel_def = task_def.memcpy_async();
auto node = op_list_[kernel_def.op_index()];
GE_CHECK_NOTNULL(node);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
std::unique_ptr<MemcpyAsyncTask> task;
GE_CHK_STATUS_RET_NOLOG(RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, model_params_, task));
task->SetModelArgs(model_name_, model_id_);
ParseArgTable(task.get(), single_op);
single_op.tasks_.emplace_back(task.release());
} else { } else {
// skip // skip
GELOGD("Skip task type: %d", static_cast<int>(task_type)); GELOGD("Skip task type: %d", static_cast<int>(task_type));


+ 1
- 0
ge/single_op/single_op_model.h View File

@@ -26,6 +26,7 @@
#include "common/helper/model_helper.h" #include "common/helper/model_helper.h"
#include "single_op/single_op.h" #include "single_op/single_op.h"
#include "single_op/stream_resource.h" #include "single_op/stream_resource.h"
#include "single_op/task/op_task.h"


namespace ge { namespace ge {
struct SingleOpModelParam { struct SingleOpModelParam {


+ 14
- 0
ge/single_op/task/op_task.cc View File

@@ -34,6 +34,7 @@
namespace ge { namespace ge {
namespace { namespace {
constexpr int kLaunchRetryTimes = 1000; constexpr int kLaunchRetryTimes = 1000;
constexpr size_t kMemcpyArgCount = 2;
constexpr int kSleepTime = 10; constexpr int kSleepTime = 10;
constexpr uint64_t kReleaseFlag = 1; constexpr uint64_t kReleaseFlag = 1;
constexpr int kCopyNum = 2; constexpr int kCopyNum = 2;
@@ -963,4 +964,17 @@ void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = io_addr_; arg_base = io_addr_;
arg_count = io_addr_num_; arg_count = io_addr_num_;
} }

Status MemcpyAsyncTask::LaunchKernel(rtStream_t stream) {
auto src_addr = reinterpret_cast<void *>(addresses_[0]);
auto dst_addr = reinterpret_cast<void *>(addresses_[1]);
kind_ = (kind_ == RT_MEMCPY_ADDR_DEVICE_TO_DEVICE) ? RT_MEMCPY_DEVICE_TO_DEVICE : kind_;
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, dst_max_, src_addr, count_, kind_, stream));
return SUCCESS;
}

void MemcpyAsyncTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = addresses_;
arg_count = kMemcpyArgCount;
}
} // namespace ge } // namespace ge

+ 19
- 0
ge/single_op/task/op_task.h View File

@@ -44,6 +44,9 @@ class OpTask {
virtual Status UpdateArgTable(const SingleOpModelParam &param); virtual Status UpdateArgTable(const SingleOpModelParam &param);
void SetModelArgs(std::string model_name, uint32_t model_id); void SetModelArgs(std::string model_name, uint32_t model_id);
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
void SetOpDesc(const OpDescPtr &op_desc) {
op_desc_ = op_desc;
}
const OpDescPtr &GetOpdesc() const {return op_desc_;} const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(rtStream_t stream); Status OpenDump(rtStream_t stream);
virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0;
@@ -244,6 +247,22 @@ private:
std::string op_type_; std::string op_type_;
uint64_t kernel_id_ = 0; uint64_t kernel_id_ = 0;
}; };

class MemcpyAsyncTask : public OpTask {
public:
Status LaunchKernel(rtStream_t stream) override;
void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override;

private:
friend class SingleOpModel;
friend class RtsKernelTaskBuilder;

uintptr_t addresses_[2];
size_t dst_max_;
size_t count_;
rtMemcpyKind_t kind_;
NodePtr node_;
};
} // namespace ge } // namespace ge


#endif // GE_SINGLE_OP_TASK_OP_TASK_H_ #endif // GE_SINGLE_OP_TASK_OP_TASK_H_

+ 45
- 0
ge/single_op/task/rts_kernel_task_builder.cc View File

@@ -0,0 +1,45 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "single_op/task/rts_kernel_task_builder.h"
#include "build_task_utils.h"

namespace ge {
namespace {
const size_t kNumAddresses = 2;
} // namespace

Status RtsKernelTaskBuilder::BuildMemcpyAsyncTask(const OpDescPtr &op_desc,
const domi::MemcpyAsyncDef &kernel_def,
const SingleOpModelParam &param,
std::unique_ptr<MemcpyAsyncTask> &task) {
task.reset(new(std::nothrow)MemcpyAsyncTask());
GE_CHECK_NOTNULL(task);
task->SetOpDesc(op_desc);
task->dst_max_ = kernel_def.dst_max();
task->count_ = kernel_def.count();
task->kind_ = static_cast<rtMemcpyKind_t>(kernel_def.kind());
auto addresses = BuildTaskUtils::JoinAddresses(BuildTaskUtils::GetAddresses(op_desc, param, false));
if (addresses.size() != kNumAddresses) {
GELOGE(INTERNAL_ERROR, "[Build][MemcpyAsyncTask] Invalid address count: %zu", addresses.size());
return INTERNAL_ERROR;
}

task->addresses_[0] = reinterpret_cast<uintptr_t>(addresses[0]);
task->addresses_[1] = reinterpret_cast<uintptr_t>(addresses[1]);
return SUCCESS;
}
} // namespace ge

+ 34
- 0
ge/single_op/task/rts_kernel_task_builder.h View File

@@ -0,0 +1,34 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_
#define GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_

#include <vector>
#include "graph/op_desc.h"
#include "single_op/single_op.h"
#include "single_op/single_op_model.h"

namespace ge {
class RtsKernelTaskBuilder {
public:
static Status BuildMemcpyAsyncTask(const OpDescPtr &op_desc,
const domi::MemcpyAsyncDef &kernel_def,
const SingleOpModelParam &param,
std::unique_ptr<MemcpyAsyncTask> &task);
};
} // namespace ge
#endif // GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_

+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -585,6 +585,7 @@ set(SINGLE_OP_SRC_FILES
"${GE_CODE_DIR}/ge/single_op/single_op_manager.cc" "${GE_CODE_DIR}/ge/single_op/single_op_manager.cc"
"${GE_CODE_DIR}/ge/single_op/task/aicpu_task_builder.cc" "${GE_CODE_DIR}/ge/single_op/task/aicpu_task_builder.cc"
"${GE_CODE_DIR}/ge/single_op/task/aicpu_kernel_task_builder.cc" "${GE_CODE_DIR}/ge/single_op/task/aicpu_kernel_task_builder.cc"
"${GE_CODE_DIR}/ge/single_op/task/rts_kernel_task_builder.cc"
"${GE_CODE_DIR}/ge/hybrid/common/tensor_value.cc" "${GE_CODE_DIR}/ge/hybrid/common/tensor_value.cc"
"${GE_CODE_DIR}/ge/hybrid/common/npu_memory_allocator.cc" "${GE_CODE_DIR}/ge/hybrid/common/npu_memory_allocator.cc"
"${GE_CODE_DIR}/ge/hybrid/executor/rt_callback_manager.cc" "${GE_CODE_DIR}/ge/hybrid/executor/rt_callback_manager.cc"


+ 47
- 0
tests/ut/ge/single_op/single_op_model_unittest.cc View File

@@ -25,6 +25,11 @@
#define private public #define private public
#include "single_op/single_op_model.h" #include "single_op/single_op_model.h"
#include "single_op/task/tbe_task_builder.h" #include "single_op/task/tbe_task_builder.h"
#include "single_op/task/rts_kernel_task_builder.h"
#include "single_op/task/op_task.h"
#include "framework/common/helper/model_helper.h"
#include "single_op/single_op.h"
#include "single_op/stream_resource.h"
#undef private #undef private
#undef protected #undef protected
#include "graph/passes/graph_builder_utils.h" #include "graph/passes/graph_builder_utils.h"
@@ -240,3 +245,45 @@ TEST_F(UtestSingleOpModel, test_host_mem) {
DynamicSingleOp single_op(0, &stream_mu_, nullptr); DynamicSingleOp single_op(0, &stream_mu_, nullptr);
ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS); ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS);
} }

TEST_F(UtestSingleOpModel, BuildTaskList) {
ComputeGraphPtr graph = make_shared<ComputeGraph>("single_op");
GeModelPtr ge_model = make_shared<GeModel>();
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
ge_model->SetModelTaskDef(model_task_def);
NodePtr node = nullptr;
{
auto op_desc = std::make_shared<ge::OpDesc>("memcpy", MEMCPYASYNC);
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
op_desc->AddInputDesc(tensor);
op_desc->AddOutputDesc(tensor);
op_desc->SetInputOffset({0});
op_desc->SetOutputOffset({0});
node = graph->AddNode(op_desc);

domi::TaskDef *task_def = model_task_def->add_task();
task_def->set_stream_id(0);
task_def->set_type(RT_MODEL_TASK_MEMCPY_ASYNC);
domi::MemcpyAsyncDef *memcpy_async = task_def->mutable_memcpy_async();
memcpy_async->set_src(0);
memcpy_async->set_dst(0);
memcpy_async->set_dst_max(512);
memcpy_async->set_count(1);
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE);
memcpy_async->set_op_index(0);
}

string model_data_str = "123456789";
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
StreamResource *res = new (std::nothrow) StreamResource(1);
std::mutex stream_mu;
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
SingleOp single_op(res, &stream_mu, stream);
model.model_helper_.model_ = ge_model;
model.op_list_.emplace(0, node);
ASSERT_EQ(model.BuildTaskList(res, single_op), SUCCESS);
MemcpyAsyncTask mem_task;
ASSERT_EQ(mem_task.LaunchKernel(0), SUCCESS);
}

Loading…
Cancel
Save