| @@ -783,22 +783,10 @@ Status AicpuNodeTask::InitForDependComputeTask() { | |||||
| "[Alloc][TensorBuffer] failed for Node[%s] to copy task input dst, size=%zu", | "[Alloc][TensorBuffer] failed for Node[%s] to copy task input dst, size=%zu", | ||||
| node_name_.c_str(), copy_input_buf_len); | node_name_.c_str(), copy_input_buf_len); | ||||
| std::vector<uint64_t> copy_io_addr; | |||||
| copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_->GetData())); | |||||
| copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_->GetData())); | |||||
| copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_->GetData())); | |||||
| copy_io_addr.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_->GetData())); | |||||
| // mem copy op has 4 inputs and 0 output. | |||||
| const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size(); | |||||
| // can alloc in init, it can reuse | |||||
| GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_), | |||||
| "[Alloc][TensorBuffer] failed for Node[%s] to copy task ioaddr, size=%zu", | |||||
| node_name_.c_str(), copy_io_addr_size); | |||||
| GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, | |||||
| ©_io_addr[0], copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_release_flag_dev_->GetData())); | |||||
| copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_data_size_dev_->GetData())); | |||||
| copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_src_dev_->GetData())); | |||||
| copy_io_addr_.emplace_back(reinterpret_cast<uintptr_t>(copy_input_dst_dev_->GetData())); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -973,7 +961,7 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||||
| auto kernel_type = static_cast<ccKernelType>(kcontext.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(kcontext.kernel_type()); | ||||
| uint32_t flag = RT_KERNEL_DEFAULT; | uint32_t flag = RT_KERNEL_DEFAULT; | ||||
| if (kernel_type == ccKernelType::CUST_AI_CPU) { | if (kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | |||||
| flag |= static_cast<uint32_t>(RT_KERNEL_C USTOM_AICPU); | |||||
| } | } | ||||
| auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), | auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), | ||||
| reinterpret_cast<const void *>(kernel_name.c_str()), | reinterpret_cast<const void *>(kernel_name.c_str()), | ||||