@@ -42,6 +42,7 @@ HybridModel::~HybridModel() { | |||||
Status HybridModel::Init(bool is_single_op) { | Status HybridModel::Init(bool is_single_op) { | ||||
GELOGD("Start to init hybrid model."); | GELOGD("Start to init hybrid model."); | ||||
is_single_op_ = is_single_op; | |||||
if (is_single_op) { | if (is_single_op) { | ||||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); | GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); | ||||
} else { | } else { | ||||
@@ -69,6 +69,10 @@ class HybridModel { | |||||
return model_id_; | return model_id_; | ||||
} | } | ||||
bool IsSingleOp() const { | |||||
return is_single_op_; | |||||
} | |||||
TensorValue* GetVariable(const string &name) const; | TensorValue* GetVariable(const string &name) const; | ||||
NodePtr GetVariableNode(const string &name) const; | NodePtr GetVariableNode(const string &name) const; | ||||
@@ -131,6 +135,7 @@ class HybridModel { | |||||
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
bool is_new_model_desc_ = false; // support aipp | bool is_new_model_desc_ = false; // support aipp | ||||
bool is_single_op_ = false; | |||||
// runtime fields | // runtime fields | ||||
uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
@@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() { | |||||
Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | ||||
GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | ||||
bool is_single_op = model.IsSingleOp(); | |||||
auto *task_defs = model.GetTaskDefs(node); | auto *task_defs = model.GetTaskDefs(node); | ||||
if (task_defs == nullptr || task_defs->empty()) { | if (task_defs == nullptr || task_defs->empty()) { | ||||
@@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod | |||||
AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | ||||
std::unique_ptr<NodeTask> node_task; | std::unique_ptr<NodeTask> node_task; | ||||
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str()); | |||||
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op), | |||||
"[%s] Failed to build op tasks.", node->GetName().c_str()); | |||||
task = std::move(node_task); | task = std::move(node_task); | ||||
GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
} | } | ||||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | ||||
if (rt_ret != RT_ERROR_NONE) { | |||||
if (rt_ret != RT_ERROR_NONE || is_single_op_) { | |||||
void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | ||||
@@ -50,6 +50,8 @@ class AiCoreOpTask { | |||||
uint32_t GetBlockDim() const {return block_dim_;} | uint32_t GetBlockDim() const {return block_dim_;} | ||||
void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op}; | |||||
protected: | protected: | ||||
Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
@@ -72,6 +74,7 @@ class AiCoreOpTask { | |||||
uint32_t args_size_ = 0; | uint32_t args_size_ = 0; | ||||
uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
bool clear_atomic_ = true; | bool clear_atomic_ = true; | ||||
bool is_single_op_ false; | |||||
std::vector<int> output_indices_to_skip_; | std::vector<int> output_indices_to_skip_; | ||||
}; | }; | ||||
@@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector | |||||
: op_desc_(op_desc), task_defs_(task_defs) { | : op_desc_(op_desc), task_defs_(task_defs) { | ||||
} | } | ||||
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) { | |||||
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, | |||||
bool ignore_failure_on_atomic, | |||||
bool is_single_op) { | |||||
GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | ||||
GELOGE(INTERNAL_ERROR, | GELOGE(INTERNAL_ERROR, | ||||
@@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||||
auto atomic_task = | auto atomic_task = | ||||
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | ||||
GE_CHECK_NOTNULL(atomic_task); | GE_CHECK_NOTNULL(atomic_task); | ||||
atomic_task->SetSingleOp(is_single_op); | |||||
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | ||||
"[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
@@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||||
// build aicore task | // build aicore task | ||||
auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | ||||
GE_CHECK_NOTNULL(aicore_task); | GE_CHECK_NOTNULL(aicore_task); | ||||
aicore_task->SetSingleOp(is_single_op); | |||||
GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | ||||
"[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
@@ -47,7 +47,7 @@ class AiCoreTaskBuilder { | |||||
AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | ||||
~AiCoreTaskBuilder() = default; | ~AiCoreTaskBuilder() = default; | ||||
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic); | |||||
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false); | |||||
private: | private: | ||||
bool ExpectAtomicAddrCleanTask(); | bool ExpectAtomicAddrCleanTask(); | ||||
@@ -251,10 +251,6 @@ Status TaskContext::AllocateOutput(int index, | |||||
} | } | ||||
} | } | ||||
if (outputs_start_[index].GetSize() > 0) { | |||||
rtMemset(outputs_start_[index].MutableData(), outputs_start_[index].GetSize(), 0, outputs_start_[index].GetSize()); | |||||
} | |||||
if (execution_context_->trace_enabled) { | if (execution_context_->trace_enabled) { | ||||
outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index)); | outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index)); | ||||
} | } | ||||
@@ -401,7 +397,7 @@ Status TaskContext::PropagateOutputs() { | |||||
subgraph_context_->all_inputs_[input_offset] = *tensor; | subgraph_context_->all_inputs_[input_offset] = *tensor; | ||||
if (execution_context_->trace_enabled) { | if (execution_context_->trace_enabled) { | ||||
subgraph_context_->all_inputs_[input_offset].SetName( | subgraph_context_->all_inputs_[input_offset].SetName( | ||||
dst_node_item->NodeName() + "_in_" + std::to_string(dst_input_idx)); | |||||
node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); | |||||
} | } | ||||
} | } | ||||
} | } | ||||