diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index da9f4fbf..4511c2b9 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -42,6 +42,7 @@ HybridModel::~HybridModel() { Status HybridModel::Init(bool is_single_op) { GELOGD("Start to init hybrid model."); + is_single_op_ = is_single_op; if (is_single_op) { GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); } else { diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 8849f57a..1f973d1e 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -69,6 +69,10 @@ class HybridModel { return model_id_; } + bool IsSingleOp() const { + return is_single_op_; + } + TensorValue* GetVariable(const string &name) const; NodePtr GetVariableNode(const string &name) const; @@ -131,6 +135,7 @@ class HybridModel { std::map> node_items_; bool is_new_model_desc_ = false; // support aipp + bool is_single_op_ = false; // runtime fields uint32_t device_id_ = 0; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index cb5a7d4c..3174df80 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() { Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GE_CHECK_NOTNULL(node); GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); + bool is_single_op = model.IsSingleOp(); auto *task_defs = model.GetTaskDefs(node); if (task_defs == nullptr || task_defs->empty()) { @@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); std::unique_ptr node_task; - GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str()); + GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op), + "[%s] Failed to build op tasks.", node->GetName().c_str()); task = std::move(node_task); GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f1bd6466..a34bba22 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { } TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); - if (rt_ret != RT_ERROR_NONE) { + if (rt_ret != RT_ERROR_NONE || is_single_op_) { void *bin_handle = nullptr; if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 3f350531..4cbb9810 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -50,6 +50,8 @@ class AiCoreOpTask { uint32_t GetBlockDim() const {return block_dim_;} + void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op}; + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; @@ -72,6 +74,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + bool is_single_op_ false; std::vector output_indices_to_skip_; }; diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc index b2996435..2bf2cb36 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc @@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector : op_desc_(op_desc), task_defs_(task_defs) { } -Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic) { +Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, + bool ignore_failure_on_atomic, + bool is_single_op) { GE_CHECK_NOTNULL(op_desc_); if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { GELOGE(INTERNAL_ERROR, @@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool i auto atomic_task = std::unique_ptr(new(std::nothrow)AtomicAddrCleanOpTask()); GE_CHECK_NOTNULL(atomic_task); + atomic_task->SetSingleOp(is_single_op); GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); @@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool i // build aicore task auto aicore_task = std::unique_ptr(new(std::nothrow)AiCoreOpTask()); GE_CHECK_NOTNULL(aicore_task); + aicore_task->SetSingleOp(is_single_op); GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.h b/ge/hybrid/node_executor/aicore/aicore_task_builder.h index 92db809d..8f95df15 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.h @@ -47,7 +47,7 @@ class AiCoreTaskBuilder { AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector &task_defs); ~AiCoreTaskBuilder() = default; - Status BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic); + Status BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic, bool is_single_op = false); private: bool ExpectAtomicAddrCleanTask(); diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 03696533..bc318124 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -251,10 +251,6 @@ Status TaskContext::AllocateOutput(int index, } } - if (outputs_start_[index].GetSize() > 0) { - rtMemset(outputs_start_[index].MutableData(), outputs_start_[index].GetSize(), 0, outputs_start_[index].GetSize()); - } - if (execution_context_->trace_enabled) { outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index)); } @@ -401,7 +397,7 @@ Status TaskContext::PropagateOutputs() { subgraph_context_->all_inputs_[input_offset] = *tensor; if (execution_context_->trace_enabled) { subgraph_context_->all_inputs_[input_offset].SetName( - dst_node_item->NodeName() + "_in_" + std::to_string(dst_input_idx)); + node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); } } }