Browse Source

fix dynamic single op

tags/v1.2.0
chuxing 3 years ago
parent
commit
613e221a97
3 changed files with 38 additions and 61 deletions
  1. +4
    -0
      ge/single_op/task/aicpu_kernel_task_builder.cc
  2. +33
    -59
      ge/single_op/task/op_task.cc
  3. +1
    -2
      ge/single_op/task/op_task.h

+ 4
- 0
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -97,6 +97,10 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons
return ret; return ret;
} }


if (task.GetUnknownType() == DEPEND_COMPUTE) {
GELOGE(FAILED, "AiCpuCCTask unknown type is depend compute, it's not supported now.");
return FAILED;
}
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(task.args_.get()); auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(task.args_.get());
if (task.ext_info_addr_dev_ != nullptr) { if (task.ext_info_addr_dev_ != nullptr) {
aicpu_param_head->extInfoLength = kernel_ext_info.size(); aicpu_param_head->extInfoLength = kernel_ext_info.size();


+ 33
- 59
ge/single_op/task/op_task.cc View File

@@ -454,6 +454,29 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor
return SUCCESS; return SUCCESS;
} }


Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) {
uintptr_t *arg_base = nullptr;
size_t arg_num = 0;
GetIoAddr(arg_base, arg_num);

// input number and output number was check in ValidateParams
for (size_t i = 0; i < inputs.size(); ++i) {
auto addr = inputs[i].data;
GE_CHECK_NOTNULL(addr);
GELOGD("AICpuTask input[%zu] addr = %p", i, addr);
*arg_base++ = reinterpret_cast<uintptr_t>(addr);
}

for (size_t i = 0; i < outputs.size(); ++i) {
auto addr = outputs[i].data;
GE_CHECK_NOTNULL(addr);
GELOGD("AICpuTask output[%zu] addr = %p", i, addr);
*arg_base++ = reinterpret_cast<uintptr_t>(addr);
}

return SUCCESS;
}

AiCpuTask::~AiCpuTask() { AiCpuTask::~AiCpuTask() {
FreeHbm(args_); FreeHbm(args_);
FreeHbm(io_addr_); FreeHbm(io_addr_);
@@ -631,40 +654,6 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output
return SUCCESS; return SUCCESS;
} }


Status AiCpuTask::SetIO(const vector<void *> &inputs, vector<void *> &outputs) {
vector<uint64_t> io_addrs;
io_addrs.reserve(num_inputs_ + num_outputs_);
for (size_t i = 0; i < num_inputs_; ++i) {
GE_CHECK_NOTNULL(inputs[i]);
GELOGD("AiCpuTask input[%zu] addr = %p", i, inputs[i]);
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(inputs[i]));
}

if (unknown_type_ != DEPEND_COMPUTE) {
for (size_t i = 0; i < num_outputs_; ++i) {
GE_CHECK_NOTNULL(outputs[i]);
GELOGD("AiCpuTask output[%zu] addr = %p", i, outputs[i]);
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(outputs[i]));
}
} else {
for (size_t i = 0; i < num_outputs_; ++i) {
void *summary_addr = output_summary_[i];
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(summary_addr));
}
}

if (!io_addrs.empty()) {
auto *dst_io_addr = const_cast<uintptr_t *>(reinterpret_cast<const uintptr_t *>(io_addr_));
GE_CHK_RT_RET(rtMemcpy(dst_io_addr,
sizeof(uint64_t) * io_addrs.size(),
&io_addrs[0],
sizeof(uint64_t) * io_addrs.size(),
RT_MEMCPY_HOST_TO_DEVICE));
GE_CHECK_NOTNULL(dst_io_addr);
};
return SUCCESS;
}

Status AiCpuTask::InitForSummaryAndCopy() { Status AiCpuTask::InitForSummaryAndCopy() {
if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) {
GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_);
@@ -736,17 +725,17 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &output_buffers, std::vector<DataBuffer> &output_buffers,
rtStream_t stream) { rtStream_t stream) {
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream));
std::vector<void *> inputs;
std::vector<void *> outputs;
for (auto &buffer : input_buffers) {
inputs.emplace_back(buffer.data);
}
for (auto &buffer : output_buffers) {
outputs.emplace_back(buffer.data);
if (unknown_type_ == DEPEND_COMPUTE) {
std::vector<DataBuffer> summary_buffers;
for (size_t i = 0; i < num_outputs_; ++i) {
summary_buffers.emplace_back(output_summary_[i], sizeof(aicpu::FWKAdapter::ResultSummary), false);
}
GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, summary_buffers));
} else {
GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers));
} }
GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs));
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));


GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));
if (unknown_type_ == DEPEND_SHAPE_RANGE) { if (unknown_type_ == DEPEND_SHAPE_RANGE) {
GE_CHK_RT_RET(rtStreamSynchronize(stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream));
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc));
@@ -817,24 +806,9 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
std::vector<GeTensorDesc> &output_desc, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &output_buffers, std::vector<DataBuffer> &output_buffers,
rtStream_t stream) { rtStream_t stream) {
GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED,
"AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.",
unknown_type_);

GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream)); GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc, stream));

size_t arg_index = 0;
auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_);
GE_CHECK_NOTNULL(task_io_addr);
for (auto &input : input_buffers) {
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input.data);
}
for (auto &output : output_buffers) {
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output.data);
}

GE_CHK_STATUS_RET_NOLOG(UpdateIoAddr(input_buffers, output_buffers));
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream));

if (unknown_type_ == DEPEND_SHAPE_RANGE) { if (unknown_type_ == DEPEND_SHAPE_RANGE) {
GE_CHK_RT_RET(rtStreamSynchronize(stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream));
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc));


+ 1
- 2
ge/single_op/task/op_task.h View File

@@ -112,6 +112,7 @@ class AiCpuBaseTask : public OpTask {
UnknowShapeOpType GetUnknownType() const { return unknown_type_; } UnknowShapeOpType GetUnknownType() const { return unknown_type_; }


protected: protected:
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id); Status SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id);


Status UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, Status UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
@@ -145,8 +146,6 @@ class AiCpuTask : public AiCpuBaseTask {
Status SetMemCopyTask(const domi::KernelExDef &kernel_def); Status SetMemCopyTask(const domi::KernelExDef &kernel_def);


private: private:
Status SetIO(const vector<void *> &inputs, vector<void *> &outputs);

// for copy task. // for copy task.
Status InitForSummaryAndCopy(); Status InitForSummaryAndCopy();
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc,


Loading…
Cancel
Save