diff --git a/ge/hybrid/common/tensor_value.cc b/ge/hybrid/common/tensor_value.cc index a1a57f63..16ecfaa4 100644 --- a/ge/hybrid/common/tensor_value.cc +++ b/ge/hybrid/common/tensor_value.cc @@ -56,7 +56,7 @@ std::unique_ptr TensorBuffer::Create(void *buffer, size_t size) { } TensorBuffer::~TensorBuffer() { - if (allocator_ != nullptr && buffer_ != nullptr) { + if (allocator_ != nullptr) { allocator_->Deallocate(buffer_, mem_type_); buffer_ = nullptr; } diff --git a/ge/hybrid/model/graph_item.cc b/ge/hybrid/model/graph_item.cc index 067070c5..4e3faf70 100644 --- a/ge/hybrid/model/graph_item.cc +++ b/ge/hybrid/model/graph_item.cc @@ -71,5 +71,8 @@ int GraphItem::GetParentOutputIndex(size_t index) const { const NodeItem *GraphItem::GetOutputNode() const { return output_node_; } +const vector> &GraphItem::GetOutputEdges() const { + return output_edges_; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/graph_item.h b/ge/hybrid/model/graph_item.h index 64d809ee..6fab9b44 100644 --- a/ge/hybrid/model/graph_item.h +++ b/ge/hybrid/model/graph_item.h @@ -29,7 +29,7 @@ class GraphItem { const vector &GetAllNodes() const; const vector &GetInputNodes() const; Status GetOutputDescList(std::vector &output_desc_list) const; - + const vector> &GetOutputEdges() const; int TotalInputs() const { return total_inputs_; } diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 132b0f8c..91b6a549 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -322,5 +322,36 @@ Status HybridModel::GetOutputDescInfo(vector &output_desc, } return SUCCESS; } + +TensorValue *HybridModel::GetConstant(const NodePtr &node) const { + if (node == nullptr) { + GELOGE(PARAM_INVALID, "Param is null"); + return nullptr; + } + + auto it = constant_tensors_.find(node); + if (it == constant_tensors_.end()) { + GELOGD("constant not found, node name = [%s]", node->GetName().c_str()); + return nullptr; + } + + GELOGD("Got constant tensor, node name = [%s], tensor = %s", + node->GetName().c_str(), + it->second->DebugString().c_str()); + return it->second.get(); +} + +TensorValue *HybridModel::GetTensor(const NodePtr &node) const { + if (node == nullptr) { + GELOGE(PARAM_INVALID, "Param is null"); + return nullptr; + } + + if (node->GetType() == CONSTANT) { + return GetConstant(node); + } + + return GetVariable(node->GetName()); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 5fd5f8f5..e521b776 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -73,6 +73,8 @@ class HybridModel { NodePtr GetVariableNode(const string &name) const; + TensorValue* GetTensor(const NodePtr &node) const; + const std::vector* GetTaskDefs(const NodePtr &node) const; const GraphItem *GetRootGraphItem() const; @@ -112,13 +114,15 @@ class HybridModel { friend class HybridModelBuilder; friend class HybridModelAsyncExecutor; + TensorValue* GetConstant(const NodePtr &node) const; + std::string model_name_; GeRootModelPtr ge_root_model_; std::map input_nodes_; - std::map constant_op_nodes_; std::map device_variable_nodes_; //lint !e148 std::map host_variable_nodes_; //lint !e148 std::map> variable_tensors_; + std::map> constant_tensors_; std::map> task_defs_; std::map known_shape_sub_models_; diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index ccbef156..da5218c5 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -810,7 +810,7 @@ Status HybridModelBuilder::AssignUninitializedConstantOps() { GELOGI("no need to assign when exec on host."); return SUCCESS; } - for (auto &it : hybrid_model_.constant_op_nodes_) { + for (auto &it : constant_op_nodes_) { const string &var_name = it.first; const NodePtr &var_node = it.second; auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); @@ -839,7 +839,7 @@ Status HybridModelBuilder::AssignUninitializedConstantOps() { } Status HybridModelBuilder::InitConstantOps() { - for (auto &it : hybrid_model_.constant_op_nodes_) { + for (auto &it : constant_op_nodes_) { const string &var_name = it.first; const NodePtr &var_node = it.second; auto op_desc = var_node->GetOpDesc(); @@ -920,7 +920,47 @@ Status HybridModelBuilder::InitVariableTensors() { } Status HybridModelBuilder::InitWeights() { - // Train do not have weight. (only got ConstOp) + auto allocator = NpuMemoryAllocator::GetAllocator(); + GE_CHECK_NOTNULL(allocator); + + for (auto &it : hybrid_model_.node_items_) { + auto &node_item = it.second; + if (node_item->node_type != CONSTANT) { + continue; + } + + const auto &constant_node = node_item->node; + auto op_desc = constant_node->GetOpDesc(); + auto v_weights = ModelUtils::GetWeights(op_desc); + if (v_weights.empty()) { + GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", constant_node->GetName().c_str()); + return INTERNAL_ERROR; + } + auto *ge_tensor = const_cast(v_weights[0].get()); + auto output_desc = op_desc->MutableOutputDesc(0); + GE_CHECK_NOTNULL(output_desc); + auto tensor_size = ge_tensor->GetData().GetSize(); + GELOGD("[%s] Start to init Constant node [%s], size = %ld", + GetGraphName(), + constant_node->GetName().c_str(), + tensor_size); + + auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + GE_CHECK_NOTNULL(tensor_buffer); + std::unique_ptr constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); + GE_CHECK_NOTNULL(constant_tensor); + constant_tensor->SetName("Constant_" + op_desc->GetName()); + if (tensor_size > 0) { + GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), + constant_tensor->GetSize(), + ge_tensor->GetData().data(), + ge_tensor->GetData().size(), + RT_MEMCPY_HOST_TO_DEVICE)); + } + + hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); + GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); + } return SUCCESS; } @@ -1053,7 +1093,7 @@ Status HybridModelBuilder::IndexSpecialNodes() { hybrid_model_.device_variable_nodes_.emplace(node->GetName(), node); } } else if (op_type == CONSTANTOP) { - hybrid_model_.constant_op_nodes_.emplace(node->GetName(), node); + constant_op_nodes_.emplace(node->GetName(), node); } else if (op_type == DATA && node->GetOwnerComputeGraph() != root_graph) { NodePtr src_node; int peer_out_index = -1; @@ -1326,7 +1366,7 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i Status HybridModelBuilder::InitModelMem() { hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM); auto total_var_size = hybrid_model_.TotalVarMemSize(); - if (total_var_size == 0 && !hybrid_model_.constant_op_nodes_.empty()) { + if (total_var_size == 0 && !constant_op_nodes_.empty()) { total_var_size = var_manager_->GetVarMemSize(RT_MEMORY_HBM) > 0 ? var_manager_->GetVarMemMaxSize() : 0; GELOGD("Model var size = 0. but got uninitialized constant. set var size to %zu.", total_var_size); } @@ -1477,6 +1517,10 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); const auto &op_type = node->GetType(); + if (op_type == NOOP) { + GELOGD("[%s] Skip NoOp", node->GetName().c_str()); + continue; + } NodeItem *node_item = nullptr; GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node, &node_item)); diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index bb349d86..a11faae2 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -48,7 +48,6 @@ class HybridModelBuilder { static Status MergeNetOutputNode(ComputeGraph &compute_graph); static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); - static Status InitWeights(); static Status BuildInputMapping(GraphItem &graph_item, std::vector &data_nodes, bool is_root_graph); @@ -68,6 +67,7 @@ class HybridModelBuilder { Status IndexSpecialNodes(); Status InitRuntimeParams(); Status InitModelMem(); + Status InitWeights(); Status TransAllVarData(); Status CopyVarData(); Status VarNodeToTensor(const NodePtr &var_node, std::unique_ptr &tensor); @@ -88,8 +88,9 @@ class HybridModelBuilder { NodeItem *MutableNodeItem(const NodePtr &node); GeRootModelPtr ge_root_model_; - std::map> weights_; std::map subgraph_models_; + std::map constant_op_nodes_; + HybridModel &hybrid_model_; std::map>> node_ref_inputs_; int node_index = 0; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index d2cfbece..2abc5b03 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -21,7 +21,20 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, AiCoreNodeExecutor); +namespace { +bool IsNoOp(const NodeItem &node_item) { + for (int i = 0; i < node_item.num_outputs; ++i) { + const auto &tensor_desc = node_item.MutableOutputDesc(i); + GE_CHECK_NOTNULL(tensor_desc); + const auto &shape = tensor_desc->MutableShape(); + if (shape.IsScalar() || shape.GetShapeSize() > 0) { + return false; + } + } + return true; +} +} // namespace AiCoreNodeTask::AiCoreNodeTask(std::vector> &&tasks) : tasks_(std::move(tasks)) { } @@ -104,9 +117,13 @@ std::shared_ptr AiCoreNodeTaskRegistry::GetTask(const std::string &nod Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { - GE_CHECK_NOTNULL(node); - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); + auto node_item = model.GetNodeItem(node); + GE_CHECK_NOTNULL(node_item); + if (IsNoOp(*node_item)) { + task = MakeShared(); + return SUCCESS; + } + auto op_desc = node_item->op_desc; GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); auto ori_node_name = node->GetName(); @@ -150,7 +167,7 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] Start"); - if (IsNoOp(context)) { + if (IsNoOp(context.GetNodeItem())) { GELOGD("[%s] Skipping execution for op with empty outputs", context.GetNodeName()); auto ret = context.TryExecuteCallback(done_callback); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End"); @@ -227,19 +244,6 @@ bool AiCoreNodeTask::IsSupportDynamicShape() { return true; } -bool AiCoreNodeTask::IsNoOp(TaskContext &task_context) { - for (int i = 0; i < task_context.NumOutputs(); ++i) { - const auto &tensor_desc = task_context.MutableOutputDesc(i); - GE_CHECK_NOTNULL(tensor_desc); - const auto &shape = tensor_desc->MutableShape(); - if (shape.IsScalar() || shape.GetShapeSize() > 0) { - return false; - } - } - - return true; -} - TaskCompilerFactory &TaskCompilerFactory::GetInstance() { static TaskCompilerFactory instance; return instance; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h index 9e92a160..f036ce85 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -60,7 +60,6 @@ class AiCoreNodeTask : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; private: - static bool IsNoOp(TaskContext &task_context); std::vector> tasks_; }; diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index a52e5670..50890d6a 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -224,9 +224,9 @@ Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, node->GetName().c_str(), node_type.c_str()); return MEMALLOC_FAILED; } - } else if (node_type == CONSTANTOP || node_type == VARIABLE) { + } else if (node_type == CONSTANT || node_type == CONSTANTOP || node_type == VARIABLE) { GELOGI("node %s type %s, use ConstantNodeTask.", node->GetName().c_str(), node_type.c_str()); - auto tensor = model.GetVariable(node->GetName()); + auto tensor = model.GetTensor(node); if (tensor == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to get tensor by name: %s", node->GetName().c_str()); return INTERNAL_ERROR; diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 95e50c31..e7cdd7c9 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -238,5 +238,13 @@ NodeExecutorRegistrar::NodeExecutorRegistrar(NodeExecutorManager::ExecutorType e NodeExecutor *(*builder)()) { NodeExecutorManager::GetInstance().RegisterExecutorBuilder(executor_type, builder); } +Status NoOpTask::UpdateArgs(TaskContext &context) { + GELOGD("[%s] Skipping UpdateArgs for op with empty outputs", context.GetNodeName()); + return SUCCESS; +} +Status NoOpTask::ExecuteAsync(TaskContext &context, std::function done_callback) { + GELOGD("[%s] Skipping execute for op with empty outputs", context.GetNodeName()); + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h index c2d32250..17ccc012 100644 --- a/ge/hybrid/node_executor/node_executor.h +++ b/ge/hybrid/node_executor/node_executor.h @@ -75,6 +75,12 @@ class NodeTask { virtual Status ExecuteAsync(TaskContext &context, std::function done_callback) = 0; }; +class NoOpTask : public NodeTask { + public: + Status UpdateArgs(TaskContext &context) override; + Status ExecuteAsync(TaskContext &context, std::function done_callback) override; +}; + // Node executor class NodeExecutor { public: