diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 2f0856bf..3099d8b6 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -102,11 +102,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons return ret; } GE_CHK_STATUS_RET(task.SetInputConst(), "[Set][InputConst] failed."); + GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "[Init][SummaryAndCopy] failed."); - if (task.GetUnknownType() == DEPEND_COMPUTE) { - GELOGE(FAILED, "[Get][UnknownType] is depend compute, it's not supported now."); - return FAILED; - } auto aicpu_param_head = reinterpret_cast(task.args_.get()); if (task.ext_info_addr_dev_ != nullptr) { aicpu_param_head->extInfoLength = kernel_ext_info.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index b6a78f9e..2a5a4d48 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -956,6 +956,74 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, return SUCCESS; } +Status AiCpuCCTask::InitForSummaryAndCopy() { + if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { + GELOGI("Unknown_type is %d, output num is %zu.", unknown_type_, num_outputs_); + return SUCCESS; + } + + output_summary_.resize(num_outputs_); + constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); + for (size_t i = 0; i < num_outputs_; ++i) { + GE_CHK_RT_RET(rtMalloc(&output_summary_[i], result_summary_size, RT_MEMORY_HBM)); + } + output_summary_host_.resize(num_outputs_); + + const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t); + + GE_CHK_RT_RET(rtMalloc(©_input_release_flag_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMalloc(©_input_data_size_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMalloc(©_input_src_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMalloc(©_input_dst_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + + GE_CHK_RT_RET(rtMalloc(©_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM)); + + std::vector copy_io_addr; + copy_io_addr.emplace_back(reinterpret_cast(copy_input_release_flag_dev_)); + copy_io_addr.emplace_back(reinterpret_cast(copy_input_data_size_dev_)); + copy_io_addr.emplace_back(reinterpret_cast(copy_input_src_dev_)); + copy_io_addr.emplace_back(reinterpret_cast(copy_input_dst_dev_)); + + const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size(); + + GE_CHK_RT_RET(rtMalloc(©_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM)); + + GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, + copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + +Status AiCpuCCTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { + if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); + REPORT_INNER_ERROR("E19999", "[sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); + return ACL_ERROR_GE_PARAM_INVALID; + } + GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), + kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + + STR_FWK_OP_KERNEL aicpu_task = {0}; + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), + kernel_def.args().data(), kernel_def.args().size()); + if (sec_ret != EOK) { + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][TaskArgs] failed, ret: %d", sec_ret); + REPORT_INNER_ERROR("E19999", "update STR_FWK_OP_KERNEL args failed because memcpy_s return %d.", sec_ret); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; + } + + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_); + aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(copy_workspace_buf_); + aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; + + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), + &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = io_addr_; arg_count = io_addr_num_; diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 19320bc0..2d2e6731 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -225,14 +225,15 @@ class AiCpuCCTask : public AiCpuBaseTask { void SetkernelName(const std::string &kernel_Name); void SetIoAddr(uintptr_t *io_addr); size_t GetArgSize() const; - + Status SetMemCopyTask(const domi::KernelExDef &kernel_def); Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, std::vector &output_desc, std::vector &output_buffers, rtStream_t stream) override; - -private: + private: + Status InitForSummaryAndCopy(); + private: friend class AiCpuCCTaskBuilder; std::string so_name_; std::string kernel_name_;