Browse Source

Fix review comments.

tags/v1.5.1
zhaozhixuan 3 years ago
parent
commit
6da3097107
7 changed files with 62 additions and 43 deletions
  1. +22
    -14
      ge/single_op/single_op_model.cc
  2. +1
    -1
      ge/single_op/single_op_model.h
  3. +19
    -13
      ge/single_op/task/op_task.cc
  4. +8
    -6
      ge/single_op/task/op_task.h
  5. +5
    -5
      ge/single_op/task/tbe_task_builder.cc
  6. +3
    -3
      ge/single_op/task/tbe_task_builder.h
  7. +4
    -1
      tests/ut/ge/single_op/single_op_task_unittest.cc

+ 22
- 14
ge/single_op/single_op_model.cc View File

@@ -432,7 +432,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask *
return SUCCESS; return SUCCESS;
} }


Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicOpTask **task) {
Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task) {
GE_CHECK_NOTNULL(task); GE_CHECK_NOTNULL(task);
const auto &context = task_def.kernel().context(); const auto &context = task_def.kernel().context();
auto iter = op_list_.find(context.op_index()); auto iter = op_list_.find(context.op_index());
@@ -442,18 +442,18 @@ Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicOpTas
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }


std::unique_ptr<AtomicOpTask> atomic_task(new (std::nothrow) AtomicOpTask());
std::unique_ptr<AtomicAddrCleanOpTask> atomic_task(new (std::nothrow) AtomicAddrCleanOpTask());
if (atomic_task == nullptr) { if (atomic_task == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicOpTask]failed.");
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicOpTask.");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicAddrCleanOpTask]failed.");
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicAddrCleanOpTask.");
return ACL_ERROR_GE_MEMORY_ALLOCATION; return ACL_ERROR_GE_MEMORY_ALLOCATION;
} }


auto builder = AtomicTaskBuilder(model_name_, iter->second, task_def);
auto builder = AtomicAddrCleanTaskBuilder(model_name_, iter->second, task_def);
auto ret = builder.BuildTask(*atomic_task, model_params_); auto ret = builder.BuildTask(*atomic_task, model_params_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "[Build][AtomicOpTask]failed.");
REPORT_INNER_ERROR("E19999", "[Build][AtomicOpTask]failed.");
GELOGE(ret, "[Build][AtomicAddrCleanOpTask]failed.");
REPORT_INNER_ERROR("E19999", "[Build][AtomicAddrCleanOpTask]failed.");
return ret; return ret;
} }


@@ -571,13 +571,21 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource,
GE_CHECK_NOTNULL(compute_graph); GE_CHECK_NOTNULL(compute_graph);
single_op.compute_graph_ = compute_graph; single_op.compute_graph_ = compute_graph;


GE_CHK_BOOL_RET_STATUS(node_tasks_.size() == 1, ACL_ERROR_GE_PARAM_INVALID,
"[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
if (node_tasks_.size() != 1) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
REPORT_INNER_ERROR("E19999", "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
return ACL_ERROR_GE_PARAM_INVALID;
}

auto iter = node_tasks_.begin(); auto iter = node_tasks_.begin();
auto node = iter->first; auto node = iter->first;
auto task_defs = iter->second;
GE_CHK_BOOL_RET_STATUS(task_defs.size() > 0 && task_defs.size() <= kNumTaskWithAtomicAddrCleanTask,
ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size());
const auto &task_defs = iter->second;
if (task_defs.size() <= 0 || task_defs.size() > kNumTaskWithAtomicAddrCleanTask) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
REPORT_INNER_ERROR("E19999", "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size());
return ACL_ERROR_GE_PARAM_INVALID;
}
GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node);
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
@@ -594,10 +602,10 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource,
} }
if (task_defs.size() == kNumTaskWithAtomicAddrCleanTask) { if (task_defs.size() == kNumTaskWithAtomicAddrCleanTask) {
const auto &atomic_task_def = task_defs.front(); const auto &atomic_task_def = task_defs.front();
AtomicOpTask *atomic_task = nullptr;
AtomicAddrCleanOpTask *atomic_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildAtomicTask(atomic_task_def, &atomic_task)); GE_CHK_STATUS_RET_NOLOG(BuildAtomicTask(atomic_task_def, &atomic_task));
GE_CHK_STATUS_RET_NOLOG(atomic_task->InitAtomicAddrCleanIndices()); GE_CHK_STATUS_RET_NOLOG(atomic_task->InitAtomicAddrCleanIndices());
tbe_task->SetAtomicTask(atomic_task);
tbe_task->SetAtomicAddrCleanTask(atomic_task);
} }
single_op.op_task_.reset(tbe_task); single_op.op_task_.reset(tbe_task);
} else if (lib_name == kEngineNameAiCpu) { } else if (lib_name == kEngineNameAiCpu) {


+ 1
- 1
ge/single_op/single_op_model.h View File

@@ -69,7 +69,7 @@ class SingleOpModel {
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicOpTask **task);
Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);




+ 19
- 13
ge/single_op/task/op_task.cc View File

@@ -268,15 +268,6 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc
dst_tensor.SetShape(GeShape(std::move(storage_shape))); dst_tensor.SetShape(GeShape(std::move(storage_shape)));
dst_tensor.SetOriginShape(src_tensor.GetShape()); dst_tensor.SetOriginShape(src_tensor.GetShape());
} }

int64_t size = 0;
graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(dst_tensor, size);
if (graph_status != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed!");
GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed!");
return FAILED;
}
TensorUtils::SetSize(dst_tensor, size);
return SUCCESS; return SUCCESS;
} }


@@ -490,7 +481,12 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
} }
} }


Status AtomicOpTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) {
Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc) {
return SUCCESS;
}

Status AtomicAddrCleanOpTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) {
uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get());
for (auto atomic_output_index : atomic_output_indices_) { for (auto atomic_output_index : atomic_output_indices_) {
if (atomic_output_index >= static_cast<int>(outputs.size())) { if (atomic_output_index >= static_cast<int>(outputs.size())) {
@@ -500,11 +496,21 @@ Status AtomicOpTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector
} }
auto &output_buffer = outputs[atomic_output_index]; auto &output_buffer = outputs[atomic_output_index];
*arg_base++ = reinterpret_cast<uintptr_t>(output_buffer.data); *arg_base++ = reinterpret_cast<uintptr_t>(output_buffer.data);

auto tensor_desc = op_desc_->MutableOutputDesc(atomic_output_index);
int64_t size = 0;
graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, size);
if (graph_status != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed!");
GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed!");
return FAILED;
}
TensorUtils::SetSize(*tensor_desc, size);
} }
return SUCCESS; return SUCCESS;
} }


Status AtomicOpTask::UpdateTilingArgs(rtStream_t stream) {
Status AtomicAddrCleanOpTask::UpdateTilingArgs(rtStream_t stream) {
if (tiling_buffer_ != nullptr) { if (tiling_buffer_ != nullptr) {
GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size());
GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(),
@@ -516,7 +522,7 @@ Status AtomicOpTask::UpdateTilingArgs(rtStream_t stream) {
return SUCCESS; return SUCCESS;
} }


Status AtomicOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) {
Status AtomicAddrCleanOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) {
auto ret = optiling::OpAtomicCalculateV2(*node_, run_info); auto ret = optiling::OpAtomicCalculateV2(*node_, run_info);
if (ret != GRAPH_SUCCESS) { if (ret != GRAPH_SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpAtomicCalculate] failed, ret = %u.", ret); GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpAtomicCalculate] failed, ret = %u.", ret);
@@ -526,7 +532,7 @@ Status AtomicOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) {
return SUCCESS; return SUCCESS;
} }


Status AtomicOpTask::InitAtomicAddrCleanIndices() {
Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices() {
GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc_->GetName().c_str()); GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc_->GetName().c_str());
std::vector<int64_t> atomic_output_indices; std::vector<int64_t> atomic_output_indices;
(void) ge::AttrUtils::GetListInt(op_desc_, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); (void) ge::AttrUtils::GetListInt(op_desc_, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices);


+ 8
- 6
ge/single_op/task/op_task.h View File

@@ -89,7 +89,7 @@ class TbeOpTask : public OpTask {
void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc);
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);
void SetAtomicTask(OpTask *task) { atomic_task_.reset(task); }
void SetAtomicAddrCleanTask(OpTask *task) { atomic_task_.reset(task); }


Status UpdateRunInfo() override; Status UpdateRunInfo() override;
Status SetArgIndex(); Status SetArgIndex();
@@ -108,13 +108,13 @@ class TbeOpTask : public OpTask {
void *tiling_buffer_ = nullptr; void *tiling_buffer_ = nullptr;
uint32_t max_tiling_size_ = 0; uint32_t max_tiling_size_ = 0;
std::string tiling_data_; std::string tiling_data_;
size_t input_num_; // include const input
size_t output_num_;


private: private:
friend class SingleOpModel; friend class SingleOpModel;
friend class TbeTaskBuilder; friend class TbeTaskBuilder;
static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor);
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc);
Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes);
Status DoLaunchKernel(rtStream_t stream); Status DoLaunchKernel(rtStream_t stream);
Status CheckAndExecuteAtomic(const vector<GeTensorDesc> &input_desc, Status CheckAndExecuteAtomic(const vector<GeTensorDesc> &input_desc,
@@ -122,6 +122,8 @@ class TbeOpTask : public OpTask {
vector<GeTensorDesc> &output_desc, vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers, vector<DataBuffer> &output_buffers,
rtStream_t stream); rtStream_t stream);
virtual Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc);
virtual Status UpdateTilingArgs(rtStream_t stream); virtual Status UpdateTilingArgs(rtStream_t stream);
virtual Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs); virtual Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs);
virtual Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info); virtual Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info);
@@ -140,17 +142,17 @@ class TbeOpTask : public OpTask {
std::string original_kernel_key_; std::string original_kernel_key_;
std::string node_info_; std::string node_info_;
std::vector<size_t> arg_index_; // data index in args std::vector<size_t> arg_index_; // data index in args
size_t input_num_; // include const input
size_t output_num_;


std::unique_ptr<OpTask> atomic_task_; std::unique_ptr<OpTask> atomic_task_;
}; };


class AtomicOpTask : public TbeOpTask {
class AtomicAddrCleanOpTask : public TbeOpTask {
public: public:
Status InitAtomicAddrCleanIndices(); Status InitAtomicAddrCleanIndices();


private: private:
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc) override;
Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) override; Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) override;
Status UpdateTilingArgs(rtStream_t stream) override; Status UpdateTilingArgs(rtStream_t stream) override;
Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override;


+ 5
- 5
ge/single_op/task/tbe_task_builder.cc View File

@@ -459,23 +459,23 @@ std::string TbeTaskBuilder::GetKeyForTvmMetaData() const {
return TVM_ATTR_NAME_METADATA; return TVM_ATTR_NAME_METADATA;
} }


Status AtomicTaskBuilder::InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam &param) {
Status AtomicAddrCleanTaskBuilder::InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam &param) {
return SUCCESS; return SUCCESS;
} }


std::string AtomicTaskBuilder::GetKeyForOpParamSize() const {
std::string AtomicAddrCleanTaskBuilder::GetKeyForOpParamSize() const {
return kAttrAtomicOpParamSize; return kAttrAtomicOpParamSize;
} }


std::string AtomicTaskBuilder::GetKeyForTvmMetaData() const {
std::string AtomicAddrCleanTaskBuilder::GetKeyForTvmMetaData() const {
return ATOMIC_ATTR_TVM_METADATA; return ATOMIC_ATTR_TVM_METADATA;
} }


void AtomicTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const {
void AtomicAddrCleanTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const {
(void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); (void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name);
} }


TBEKernelPtr AtomicTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const {
TBEKernelPtr AtomicAddrCleanTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const {
return op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, TBEKernelPtr()); return op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, TBEKernelPtr());
} }




+ 3
- 3
ge/single_op/task/tbe_task_builder.h View File

@@ -126,11 +126,11 @@ class TbeTaskBuilder {
void *handle_ = nullptr; void *handle_ = nullptr;
}; };


class AtomicTaskBuilder : public TbeTaskBuilder {
class AtomicAddrCleanTaskBuilder : public TbeTaskBuilder {
public: public:
AtomicTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def)
AtomicAddrCleanTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def)
: TbeTaskBuilder(model_name, node, task_def) {} : TbeTaskBuilder(model_name, node, task_def) {}
~AtomicTaskBuilder() override = default;
~AtomicAddrCleanTaskBuilder() override = default;


protected: protected:
std::string GetKeyForOpParamSize() const override; std::string GetKeyForOpParamSize() const override;


+ 4
- 1
tests/ut/ge/single_op/single_op_task_unittest.cc View File

@@ -157,8 +157,11 @@ TEST_F(UtestSingleOpTask, test_update_ioaddr) {
TEST_F(UtestSingleOpTask, test_atomic_exec) { TEST_F(UtestSingleOpTask, test_atomic_exec) {
auto graph = make_shared<ComputeGraph>("graph"); auto graph = make_shared<ComputeGraph>("graph");
auto op_desc = make_shared<OpDesc>("Add", "Add"); auto op_desc = make_shared<OpDesc>("Add", "Add");
GeTensorDesc desc;
op_desc->AddInputDesc(desc);
op_desc->AddOutputDesc(desc);
auto node = graph->AddNode(op_desc); auto node = graph->AddNode(op_desc);
AtomicOpTask task;
AtomicAddrCleanOpTask task;
task.op_desc_ = op_desc; task.op_desc_ = op_desc;
task.node_ = node; task.node_ = node;




Loading…
Cancel
Save