|
|
|
@@ -956,6 +956,74 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuCCTask::InitForSummaryAndCopy() { |
|
|
|
if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { |
|
|
|
GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
output_summary_.resize(num_outputs_); |
|
|
|
constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
GE_CHK_RT_RET(rtMalloc(&output_summary_[i], result_summary_size, RT_MEMORY_HBM)); |
|
|
|
} |
|
|
|
output_summary_host_.resize(num_outputs_); |
|
|
|
|
|
|
|
const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_release_flag_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_data_size_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_src_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_input_dst_dev_, copy_input_buf_len, RT_MEMORY_HBM)); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMalloc(©_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM)); |
|
|
|
|
|
|
|
std::vector<uint64_t> copy_io_addr; |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_)); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_)); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_)); |
|
|
|
copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_)); |
|
|
|
|
|
|
|
const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size(); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMalloc(©_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM)); |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, |
|
|
|
copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuCCTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { |
|
|
|
if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { |
|
|
|
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", |
|
|
|
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); |
|
|
|
REPORT_INNER_ERROR("E19999", "[sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", |
|
|
|
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); |
|
|
|
return ACL_ERROR_GE_PARAM_INVALID; |
|
|
|
} |
|
|
|
GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); |
|
|
|
GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), |
|
|
|
kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
|
|
|
|
STR_FWK_OP_KERNEL aicpu_task = {0}; |
|
|
|
auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), |
|
|
|
kernel_def.args().data(), kernel_def.args().size()); |
|
|
|
if (sec_ret != EOK) { |
|
|
|
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][TaskArgs] failed, ret: %d", sec_ret); |
|
|
|
REPORT_INNER_ERROR("E19999", "update STR_FWK_OP_KERNEL args failed because memcpy_s return %d.", sec_ret); |
|
|
|
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; |
|
|
|
} |
|
|
|
|
|
|
|
aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_); |
|
|
|
aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast<uintptr_t>(copy_workspace_buf_); |
|
|
|
aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; |
|
|
|
aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; |
|
|
|
|
|
|
|
GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), |
|
|
|
&aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { |
|
|
|
arg_base = io_addr_; |
|
|
|
arg_count = io_addr_num_; |
|
|
|
|