Browse Source

fix

pull/1879/head
guopeian 4 years ago
parent
commit
b323fe3d80
3 changed files with 73 additions and 7 deletions
  1. +1
    -4
      ge/single_op/task/aicpu_kernel_task_builder.cc
  2. +68
    -0
      ge/single_op/task/op_task.cc
  3. +4
    -3
      ge/single_op/task/op_task.h

+ 1
- 4
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -102,11 +102,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons
return ret; return ret;
} }
GE_CHK_STATUS_RET(task.SetInputConst(), "[Set][InputConst] failed."); GE_CHK_STATUS_RET(task.SetInputConst(), "[Set][InputConst] failed.");
GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "[Init][SummaryAndCopy] failed.");


if (task.GetUnknownType() == DEPEND_COMPUTE) {
GELOGE(FAILED, "[Get][UnknownType] is depend compute, it's not supported now.");
return FAILED;
}
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(task.args_.get()); auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(task.args_.get());
if (task.ext_info_addr_dev_ != nullptr) { if (task.ext_info_addr_dev_ != nullptr) {
aicpu_param_head->extInfoLength = kernel_ext_info.size(); aicpu_param_head->extInfoLength = kernel_ext_info.size();


+ 68
- 0
ge/single_op/task/op_task.cc View File

@@ -956,6 +956,74 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
return SUCCESS; return SUCCESS;
} }


Status AiCpuCCTask::InitForSummaryAndCopy() {
if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) {
GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_);
return SUCCESS;
}

output_summary_.resize(num_outputs_);
constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary);
for (size_t i = 0; i < num_outputs_; ++i) {
GE_CHK_RT_RET(rtMalloc(&output_summary_[i], result_summary_size, RT_MEMORY_HBM));
}
output_summary_host_.resize(num_outputs_);

const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t);

GE_CHK_RT_RET(rtMalloc(&copy_input_release_flag_dev_, copy_input_buf_len, RT_MEMORY_HBM));
GE_CHK_RT_RET(rtMalloc(&copy_input_data_size_dev_, copy_input_buf_len, RT_MEMORY_HBM));
GE_CHK_RT_RET(rtMalloc(&copy_input_src_dev_, copy_input_buf_len, RT_MEMORY_HBM));
GE_CHK_RT_RET(rtMalloc(&copy_input_dst_dev_, copy_input_buf_len, RT_MEMORY_HBM));

GE_CHK_RT_RET(rtMalloc(&copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM));

std::vector<uint64_t> copy_io_addr;
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_));
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_));
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_));
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_));

const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size();

GE_CHK_RT_RET(rtMalloc(&copy_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM));

GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size,
copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE));
return SUCCESS;
}

Status AiCpuCCTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
REPORT_INNER_ERROR("E19999", "[sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
return ACL_ERROR_GE_PARAM_INVALID;
}
GE_CHK_RT_RET(rtMalloc(&copy_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM));
GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(),
kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE));

STR_FWK_OP_KERNEL aicpu_task = {0};
auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL),
kernel_def.args().data(), kernel_def.args().size());
if (sec_ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][TaskArgs] failed, ret: %d", sec_ret);
REPORT_INNER_ERROR("E19999", "update STR_FWK_OP_KERNEL args failed because memcpy_s return %d.", sec_ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}

aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_);
aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast<uintptr_t>(copy_workspace_buf_);
aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;

GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL),
&aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE));
return SUCCESS;
}

void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = io_addr_; arg_base = io_addr_;
arg_count = io_addr_num_; arg_count = io_addr_num_;


+ 4
- 3
ge/single_op/task/op_task.h View File

@@ -225,14 +225,15 @@ class AiCpuCCTask : public AiCpuBaseTask {
void SetkernelName(const std::string &kernel_Name); void SetkernelName(const std::string &kernel_Name);
void SetIoAddr(uintptr_t *io_addr); void SetIoAddr(uintptr_t *io_addr);
size_t GetArgSize() const; size_t GetArgSize() const;
Status SetMemCopyTask(const domi::KernelExDef &kernel_def);
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &input_buffers, const std::vector<DataBuffer> &input_buffers,
std::vector<GeTensorDesc> &output_desc, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &output_buffers, std::vector<DataBuffer> &output_buffers,
rtStream_t stream) override; rtStream_t stream) override;

private:
private:
Status InitForSummaryAndCopy();
private:
friend class AiCpuCCTaskBuilder; friend class AiCpuCCTaskBuilder;
std::string so_name_; std::string so_name_;
std::string kernel_name_; std::string kernel_name_;


Loading…
Cancel
Save