|
|
@@ -454,6 +454,29 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) { |
|
|
|
uintptr_t *arg_base = nullptr; |
|
|
|
size_t arg_num = 0; |
|
|
|
GetIoAddr(arg_base, arg_num); |
|
|
|
|
|
|
|
// input number and output number was check in ValidateParams |
|
|
|
for (size_t i = 0; i < inputs.size(); ++i) { |
|
|
|
auto addr = inputs[i].data; |
|
|
|
GE_CHECK_NOTNULL(addr); |
|
|
|
GELOGD("AICpuTask input[%zu] addr = %p", i, addr); |
|
|
|
*arg_base++ = reinterpret_cast<uintptr_t>(addr); |
|
|
|
} |
|
|
|
|
|
|
|
for (size_t i = 0; i < outputs.size(); ++i) { |
|
|
|
auto addr = outputs[i].data; |
|
|
|
GE_CHECK_NOTNULL(addr); |
|
|
|
GELOGD("AICpuTask output[%zu] addr = %p", i, addr); |
|
|
|
*arg_base++ = reinterpret_cast<uintptr_t>(addr); |
|
|
|
} |
|
|
|
|
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
AiCpuTask::~AiCpuTask() { |
|
|
|
FreeHbm(args_); |
|
|
|
FreeHbm(io_addr_); |
|
|
@@ -631,40 +654,6 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::SetIO(const vector<void *> &inputs, vector<void *> &outputs) { |
|
|
|
vector<uint64_t> io_addrs; |
|
|
|
io_addrs.reserve(num_inputs_ + num_outputs_); |
|
|
|
for (size_t i = 0; i < num_inputs_; ++i) { |
|
|
|
GE_CHECK_NOTNULL(inputs[i]); |
|
|
|
GELOGD("AiCpuTask input[%zu] addr = %p", i, inputs[i]); |
|
|
|
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(inputs[i])); |
|
|
|
} |
|
|
|
|
|
|
|
if (unknown_type_ != DEPEND_COMPUTE) { |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
GE_CHECK_NOTNULL(outputs[i]); |
|
|
|
GELOGD("AiCpuTask output[%zu] addr = %p", i, outputs[i]); |
|
|
|
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(outputs[i])); |
|
|
|
} |
|
|
|
} else { |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
void *summary_addr = output_summary_[i]; |
|
|
|
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(summary_addr)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (!io_addrs.empty()) { |
|
|
|
auto *dst_io_addr = const_cast<uintptr_t *>(reinterpret_cast<const uintptr_t *>(io_addr_)); |
|
|
|
GE_CHK_RT_RET(rtMemcpy(dst_io_addr, |
|
|
|
sizeof(uint64_t) * io_addrs.size(), |
|
|
|
&io_addrs[0], |
|
|
|
sizeof(uint64_t) * io_addrs.size(), |
|
|
|
RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
GE_CHECK_NOTNULL(dst_io_addr); |
|
|
|
}; |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCpuTask::InitForSummaryAndCopy() { |
|
|
|
if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { |
|
|
|
GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); |
|
|
@@ -736,17 +725,17 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
std::vector<DataBuffer> &output_buffers, |
|
|
|
rtStream_t stream) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); |
|
|
|
std::vector<void *> inputs; |
|
|
|
std::vector<void *> outputs; |
|
|
|
for (auto &buffer : input_buffers) { |
|
|
|
inputs.emplace_back(buffer.data); |
|
|
|
} |
|
|
|
for (auto &buffer : output_buffers) { |
|
|
|
outputs.emplace_back(buffer.data); |
|
|
|
if (unknown_type_ == DEPEND_COMPUTE) { |
|
|
|
std::vector<DataBuffer> summary_buffers; |
|
|
|
for (size_t i = 0; i < num_outputs_; ++i) { |
|
|
|
summary_buffers.emplace_back(output_summary_[i], sizeof(aicpu::FWKAdapter::ResultSummary), false); |
|
|
|
} |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, summary_buffers)); |
|
|
|
} else { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); |
|
|
|
} |
|
|
|
GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); |
|
|
|
|
|
|
|
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); |
|
|
|
if (unknown_type_ == DEPEND_SHAPE_RANGE) { |
|
|
|
GE_CHK_RT_RET(rtStreamSynchronize(stream)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); |
|
|
@@ -817,24 +806,9 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, |
|
|
|
std::vector<GeTensorDesc> &output_desc, |
|
|
|
std::vector<DataBuffer> &output_buffers, |
|
|
|
rtStream_t stream) { |
|
|
|
GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, |
|
|
|
"AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", |
|
|
|
unknown_type_); |
|
|
|
|
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); |
|
|
|
|
|
|
|
size_t arg_index = 0; |
|
|
|
auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_); |
|
|
|
GE_CHECK_NOTNULL(task_io_addr); |
|
|
|
for (auto &input : input_buffers) { |
|
|
|
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input.data); |
|
|
|
} |
|
|
|
for (auto &output : output_buffers) { |
|
|
|
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output.data); |
|
|
|
} |
|
|
|
|
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); |
|
|
|
|
|
|
|
if (unknown_type_ == DEPEND_SHAPE_RANGE) { |
|
|
|
GE_CHK_RT_RET(rtStreamSynchronize(stream)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); |
|
|
|