diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index a5547b39..426d3233 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -432,7 +432,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * return SUCCESS; } -Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicOpTask **task) { +Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task) { GE_CHECK_NOTNULL(task); const auto &context = task_def.kernel().context(); auto iter = op_list_.find(context.op_index()); @@ -442,18 +442,18 @@ Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicOpTas return ACL_ERROR_GE_INTERNAL_ERROR; } - std::unique_ptr atomic_task(new (std::nothrow) AtomicOpTask()); + std::unique_ptr atomic_task(new (std::nothrow) AtomicAddrCleanOpTask()); if (atomic_task == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicOpTask]failed."); - REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicOpTask."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicAddrCleanOpTask]failed."); + REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicAddrCleanOpTask."); return ACL_ERROR_GE_MEMORY_ALLOCATION; } - auto builder = AtomicTaskBuilder(model_name_, iter->second, task_def); + auto builder = AtomicAddrCleanTaskBuilder(model_name_, iter->second, task_def); auto ret = builder.BuildTask(*atomic_task, model_params_); if (ret != SUCCESS) { - GELOGE(ret, "[Build][AtomicOpTask]failed."); - REPORT_INNER_ERROR("E19999", "[Build][AtomicOpTask]failed."); + GELOGE(ret, "[Build][AtomicAddrCleanOpTask]failed."); + REPORT_INNER_ERROR("E19999", "[Build][AtomicAddrCleanOpTask]failed."); return ret; } @@ -571,13 +571,21 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, GE_CHECK_NOTNULL(compute_graph); single_op.compute_graph_ = compute_graph; - GE_CHK_BOOL_RET_STATUS(node_tasks_.size() == 1, ACL_ERROR_GE_PARAM_INVALID, - "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + if (node_tasks_.size() != 1) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + REPORT_INNER_ERROR("E19999", "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + return ACL_ERROR_GE_PARAM_INVALID; + } + auto iter = node_tasks_.begin(); auto node = iter->first; - auto task_defs = iter->second; - GE_CHK_BOOL_RET_STATUS(task_defs.size() > 0 && task_defs.size() <= kNumTaskWithAtomicAddrCleanTask, - ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size()); + const auto &task_defs = iter->second; + if (task_defs.size() <= 0 || task_defs.size() > kNumTaskWithAtomicAddrCleanTask) { + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); + REPORT_INNER_ERROR("E19999", "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size()); + return ACL_ERROR_GE_PARAM_INVALID; + } + GE_CHECK_NOTNULL(node); auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -594,10 +602,10 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, } if (task_defs.size() == kNumTaskWithAtomicAddrCleanTask) { const auto &atomic_task_def = task_defs.front(); - AtomicOpTask *atomic_task = nullptr; + AtomicAddrCleanOpTask *atomic_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildAtomicTask(atomic_task_def, &atomic_task)); GE_CHK_STATUS_RET_NOLOG(atomic_task->InitAtomicAddrCleanIndices()); - tbe_task->SetAtomicTask(atomic_task); + tbe_task->SetAtomicAddrCleanTask(atomic_task); } single_op.op_task_.reset(tbe_task); } else if (lib_name == kEngineNameAiCpu) { diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 83490f5f..b1cd161c 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -69,7 +69,7 @@ class SingleOpModel { Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); - Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicOpTask **task); + Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index dfdec750..c6c99ab0 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -268,15 +268,6 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc dst_tensor.SetShape(GeShape(std::move(storage_shape))); dst_tensor.SetOriginShape(src_tensor.GetShape()); } - - int64_t size = 0; - graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(dst_tensor, size); - if (graph_status != GRAPH_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed!"); - GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed!"); - return FAILED; - } - TensorUtils::SetSize(dst_tensor, size); return SUCCESS; } @@ -490,7 +481,12 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { } } -Status AtomicOpTask::UpdateIoAddr(const vector &inputs, const vector &outputs) { +Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector &input_desc, + const vector &output_desc) { + return SUCCESS; +} + +Status AtomicAddrCleanOpTask::UpdateIoAddr(const vector &inputs, const vector &outputs) { uintptr_t *arg_base = reinterpret_cast(args_.get()); for (auto atomic_output_index : atomic_output_indices_) { if (atomic_output_index >= static_cast(outputs.size())) { @@ -500,11 +496,21 @@ Status AtomicOpTask::UpdateIoAddr(const vector &inputs, const vector } auto &output_buffer = outputs[atomic_output_index]; *arg_base++ = reinterpret_cast(output_buffer.data); + + auto tensor_desc = op_desc_->MutableOutputDesc(atomic_output_index); + int64_t size = 0; + graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, size); + if (graph_status != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed!"); + GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed!"); + return FAILED; + } + TensorUtils::SetSize(*tensor_desc, size); } return SUCCESS; } -Status AtomicOpTask::UpdateTilingArgs(rtStream_t stream) { +Status AtomicAddrCleanOpTask::UpdateTilingArgs(rtStream_t stream) { if (tiling_buffer_ != nullptr) { GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), @@ -516,7 +522,7 @@ Status AtomicOpTask::UpdateTilingArgs(rtStream_t stream) { return SUCCESS; } -Status AtomicOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { +Status AtomicAddrCleanOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { auto ret = optiling::OpAtomicCalculateV2(*node_, run_info); if (ret != GRAPH_SUCCESS) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpAtomicCalculate] failed, ret = %u.", ret); @@ -526,7 +532,7 @@ Status AtomicOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { return SUCCESS; } -Status AtomicOpTask::InitAtomicAddrCleanIndices() { +Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices() { GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc_->GetName().c_str()); std::vector atomic_output_indices; (void) ge::AttrUtils::GetListInt(op_desc_, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 1e100a11..0ce4cbae 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -89,7 +89,7 @@ class TbeOpTask : public OpTask { void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); void SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); - void SetAtomicTask(OpTask *task) { atomic_task_.reset(task); } + void SetAtomicAddrCleanTask(OpTask *task) { atomic_task_.reset(task); } Status UpdateRunInfo() override; Status SetArgIndex(); @@ -108,13 +108,13 @@ class TbeOpTask : public OpTask { void *tiling_buffer_ = nullptr; uint32_t max_tiling_size_ = 0; std::string tiling_data_; + size_t input_num_; // include const input + size_t output_num_; private: friend class SingleOpModel; friend class TbeTaskBuilder; static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); - Status UpdateNodeByShape(const vector &input_desc, - const vector &output_desc); Status AllocateWorkspaces(const std::vector &workspace_sizes); Status DoLaunchKernel(rtStream_t stream); Status CheckAndExecuteAtomic(const vector &input_desc, @@ -122,6 +122,8 @@ class TbeOpTask : public OpTask { vector &output_desc, vector &output_buffers, rtStream_t stream); + virtual Status UpdateNodeByShape(const vector &input_desc, + const vector &output_desc); virtual Status UpdateTilingArgs(rtStream_t stream); virtual Status UpdateIoAddr(const vector &inputs, const vector &outputs); virtual Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info); @@ -140,17 +142,17 @@ class TbeOpTask : public OpTask { std::string original_kernel_key_; std::string node_info_; std::vector arg_index_; // data index in args - size_t input_num_; // include const input - size_t output_num_; std::unique_ptr atomic_task_; }; -class AtomicOpTask : public TbeOpTask { +class AtomicAddrCleanOpTask : public TbeOpTask { public: Status InitAtomicAddrCleanIndices(); private: + Status UpdateNodeByShape(const vector &input_desc, + const vector &output_desc) override; Status UpdateIoAddr(const vector &inputs, const vector &outputs) override; Status UpdateTilingArgs(rtStream_t stream) override; Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index c5579a01..017dac25 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -459,23 +459,23 @@ std::string TbeTaskBuilder::GetKeyForTvmMetaData() const { return TVM_ATTR_NAME_METADATA; } -Status AtomicTaskBuilder::InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m) { +Status AtomicAddrCleanTaskBuilder::InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m) { return SUCCESS; } -std::string AtomicTaskBuilder::GetKeyForOpParamSize() const { +std::string AtomicAddrCleanTaskBuilder::GetKeyForOpParamSize() const { return kAttrAtomicOpParamSize; } -std::string AtomicTaskBuilder::GetKeyForTvmMetaData() const { +std::string AtomicAddrCleanTaskBuilder::GetKeyForTvmMetaData() const { return ATOMIC_ATTR_TVM_METADATA; } -void AtomicTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const { +void AtomicAddrCleanTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const { (void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); } -TBEKernelPtr AtomicTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const { +TBEKernelPtr AtomicAddrCleanTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const { return op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, TBEKernelPtr()); } diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h index 833ab0e0..06d17901 100755 --- a/ge/single_op/task/tbe_task_builder.h +++ b/ge/single_op/task/tbe_task_builder.h @@ -126,11 +126,11 @@ class TbeTaskBuilder { void *handle_ = nullptr; }; -class AtomicTaskBuilder : public TbeTaskBuilder { +class AtomicAddrCleanTaskBuilder : public TbeTaskBuilder { public: - AtomicTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) + AtomicAddrCleanTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) : TbeTaskBuilder(model_name, node, task_def) {} - ~AtomicTaskBuilder() override = default; + ~AtomicAddrCleanTaskBuilder() override = default; protected: std::string GetKeyForOpParamSize() const override; diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc index f6ae0dbf..3e3160c2 100644 --- a/tests/ut/ge/single_op/single_op_task_unittest.cc +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -157,8 +157,11 @@ TEST_F(UtestSingleOpTask, test_update_ioaddr) { TEST_F(UtestSingleOpTask, test_atomic_exec) { auto graph = make_shared("graph"); auto op_desc = make_shared("Add", "Add"); + GeTensorDesc desc; + op_desc->AddInputDesc(desc); + op_desc->AddOutputDesc(desc); auto node = graph->AddNode(op_desc); - AtomicOpTask task; + AtomicAddrCleanOpTask task; task.op_desc_ = op_desc; task.node_ = node;