From: @xchu42 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @ji_chentags/v1.2.0
| @@ -56,7 +56,7 @@ std::unique_ptr<TensorBuffer> TensorBuffer::Create(void *buffer, size_t size) { | |||||
| } | } | ||||
| TensorBuffer::~TensorBuffer() { | TensorBuffer::~TensorBuffer() { | ||||
| if (allocator_ != nullptr && buffer_ != nullptr) { | |||||
| if (allocator_ != nullptr) { | |||||
| allocator_->Deallocate(buffer_, mem_type_); | allocator_->Deallocate(buffer_, mem_type_); | ||||
| buffer_ = nullptr; | buffer_ = nullptr; | ||||
| } | } | ||||
| @@ -71,5 +71,8 @@ int GraphItem::GetParentOutputIndex(size_t index) const { | |||||
| const NodeItem *GraphItem::GetOutputNode() const { | const NodeItem *GraphItem::GetOutputNode() const { | ||||
| return output_node_; | return output_node_; | ||||
| } | } | ||||
| const vector<std::pair<const NodeItem *, int>> &GraphItem::GetOutputEdges() const { | |||||
| return output_edges_; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -29,7 +29,7 @@ class GraphItem { | |||||
| const vector<NodeItem *> &GetAllNodes() const; | const vector<NodeItem *> &GetAllNodes() const; | ||||
| const vector<const NodeItem *> &GetInputNodes() const; | const vector<const NodeItem *> &GetInputNodes() const; | ||||
| Status GetOutputDescList(std::vector<ConstGeTensorDescPtr> &output_desc_list) const; | Status GetOutputDescList(std::vector<ConstGeTensorDescPtr> &output_desc_list) const; | ||||
| const vector<std::pair<const NodeItem *, int>> &GetOutputEdges() const; | |||||
| int TotalInputs() const { | int TotalInputs() const { | ||||
| return total_inputs_; | return total_inputs_; | ||||
| } | } | ||||
| @@ -322,5 +322,36 @@ Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| TensorValue *HybridModel::GetConstant(const NodePtr &node) const { | |||||
| if (node == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "Param is null"); | |||||
| return nullptr; | |||||
| } | |||||
| auto it = constant_tensors_.find(node); | |||||
| if (it == constant_tensors_.end()) { | |||||
| GELOGD("constant not found, node name = [%s]", node->GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| GELOGD("Got constant tensor, node name = [%s], tensor = %s", | |||||
| node->GetName().c_str(), | |||||
| it->second->DebugString().c_str()); | |||||
| return it->second.get(); | |||||
| } | |||||
| TensorValue *HybridModel::GetTensor(const NodePtr &node) const { | |||||
| if (node == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "Param is null"); | |||||
| return nullptr; | |||||
| } | |||||
| if (node->GetType() == CONSTANT) { | |||||
| return GetConstant(node); | |||||
| } | |||||
| return GetVariable(node->GetName()); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -73,6 +73,8 @@ class HybridModel { | |||||
| NodePtr GetVariableNode(const string &name) const; | NodePtr GetVariableNode(const string &name) const; | ||||
| TensorValue* GetTensor(const NodePtr &node) const; | |||||
| const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | ||||
| const GraphItem *GetRootGraphItem() const; | const GraphItem *GetRootGraphItem() const; | ||||
| @@ -112,13 +114,15 @@ class HybridModel { | |||||
| friend class HybridModelBuilder; | friend class HybridModelBuilder; | ||||
| friend class HybridModelAsyncExecutor; | friend class HybridModelAsyncExecutor; | ||||
| TensorValue* GetConstant(const NodePtr &node) const; | |||||
| std::string model_name_; | std::string model_name_; | ||||
| GeRootModelPtr ge_root_model_; | GeRootModelPtr ge_root_model_; | ||||
| std::map<uint32_t, NodeItem *> input_nodes_; | std::map<uint32_t, NodeItem *> input_nodes_; | ||||
| std::map<std::string, NodePtr> constant_op_nodes_; | |||||
| std::map<std::string, NodePtr> device_variable_nodes_; //lint !e148 | std::map<std::string, NodePtr> device_variable_nodes_; //lint !e148 | ||||
| std::map<std::string, NodePtr> host_variable_nodes_; //lint !e148 | std::map<std::string, NodePtr> host_variable_nodes_; //lint !e148 | ||||
| std::map<std::string, std::unique_ptr<TensorValue>> variable_tensors_; | std::map<std::string, std::unique_ptr<TensorValue>> variable_tensors_; | ||||
| std::map<NodePtr, std::unique_ptr<TensorValue>> constant_tensors_; | |||||
| std::map<NodePtr, std::vector<domi::TaskDef>> task_defs_; | std::map<NodePtr, std::vector<domi::TaskDef>> task_defs_; | ||||
| std::map<NodePtr, GeModelPtr> known_shape_sub_models_; | std::map<NodePtr, GeModelPtr> known_shape_sub_models_; | ||||
| @@ -810,7 +810,7 @@ Status HybridModelBuilder::AssignUninitializedConstantOps() { | |||||
| GELOGI("no need to assign when exec on host."); | GELOGI("no need to assign when exec on host."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| for (auto &it : hybrid_model_.constant_op_nodes_) { | |||||
| for (auto &it : constant_op_nodes_) { | |||||
| const string &var_name = it.first; | const string &var_name = it.first; | ||||
| const NodePtr &var_node = it.second; | const NodePtr &var_node = it.second; | ||||
| auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); | auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); | ||||
| @@ -839,7 +839,7 @@ Status HybridModelBuilder::AssignUninitializedConstantOps() { | |||||
| } | } | ||||
| Status HybridModelBuilder::InitConstantOps() { | Status HybridModelBuilder::InitConstantOps() { | ||||
| for (auto &it : hybrid_model_.constant_op_nodes_) { | |||||
| for (auto &it : constant_op_nodes_) { | |||||
| const string &var_name = it.first; | const string &var_name = it.first; | ||||
| const NodePtr &var_node = it.second; | const NodePtr &var_node = it.second; | ||||
| auto op_desc = var_node->GetOpDesc(); | auto op_desc = var_node->GetOpDesc(); | ||||
| @@ -920,7 +920,47 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| } | } | ||||
| Status HybridModelBuilder::InitWeights() { | Status HybridModelBuilder::InitWeights() { | ||||
| // Train do not have weight. (only got ConstOp) | |||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
| GE_CHECK_NOTNULL(allocator); | |||||
| for (auto &it : hybrid_model_.node_items_) { | |||||
| auto &node_item = it.second; | |||||
| if (node_item->node_type != CONSTANT) { | |||||
| continue; | |||||
| } | |||||
| const auto &constant_node = node_item->node; | |||||
| auto op_desc = constant_node->GetOpDesc(); | |||||
| auto v_weights = ModelUtils::GetWeights(op_desc); | |||||
| if (v_weights.empty()) { | |||||
| GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", constant_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | |||||
| auto output_desc = op_desc->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| auto tensor_size = ge_tensor->GetData().GetSize(); | |||||
| GELOGD("[%s] Start to init Constant node [%s], size = %ld", | |||||
| GetGraphName(), | |||||
| constant_node->GetName().c_str(), | |||||
| tensor_size); | |||||
| auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); | |||||
| GE_CHECK_NOTNULL(tensor_buffer); | |||||
| std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | |||||
| GE_CHECK_NOTNULL(constant_tensor); | |||||
| constant_tensor->SetName("Constant_" + op_desc->GetName()); | |||||
| if (tensor_size > 0) { | |||||
| GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), | |||||
| constant_tensor->GetSize(), | |||||
| ge_tensor->GetData().data(), | |||||
| ge_tensor->GetData().size(), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| } | |||||
| hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); | |||||
| GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1053,7 +1093,7 @@ Status HybridModelBuilder::IndexSpecialNodes() { | |||||
| hybrid_model_.device_variable_nodes_.emplace(node->GetName(), node); | hybrid_model_.device_variable_nodes_.emplace(node->GetName(), node); | ||||
| } | } | ||||
| } else if (op_type == CONSTANTOP) { | } else if (op_type == CONSTANTOP) { | ||||
| hybrid_model_.constant_op_nodes_.emplace(node->GetName(), node); | |||||
| constant_op_nodes_.emplace(node->GetName(), node); | |||||
| } else if (op_type == DATA && node->GetOwnerComputeGraph() != root_graph) { | } else if (op_type == DATA && node->GetOwnerComputeGraph() != root_graph) { | ||||
| NodePtr src_node; | NodePtr src_node; | ||||
| int peer_out_index = -1; | int peer_out_index = -1; | ||||
| @@ -1326,7 +1366,7 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i | |||||
| Status HybridModelBuilder::InitModelMem() { | Status HybridModelBuilder::InitModelMem() { | ||||
| hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM); | hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM); | ||||
| auto total_var_size = hybrid_model_.TotalVarMemSize(); | auto total_var_size = hybrid_model_.TotalVarMemSize(); | ||||
| if (total_var_size == 0 && !hybrid_model_.constant_op_nodes_.empty()) { | |||||
| if (total_var_size == 0 && !constant_op_nodes_.empty()) { | |||||
| total_var_size = var_manager_->GetVarMemSize(RT_MEMORY_HBM) > 0 ? var_manager_->GetVarMemMaxSize() : 0; | total_var_size = var_manager_->GetVarMemSize(RT_MEMORY_HBM) > 0 ? var_manager_->GetVarMemMaxSize() : 0; | ||||
| GELOGD("Model var size = 0. but got uninitialized constant. set var size to %zu.", total_var_size); | GELOGD("Model var size = 0. but got uninitialized constant. set var size to %zu.", total_var_size); | ||||
| } | } | ||||
| @@ -1477,6 +1517,10 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root | |||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| const auto &op_type = node->GetType(); | const auto &op_type = node->GetType(); | ||||
| if (op_type == NOOP) { | |||||
| GELOGD("[%s] Skip NoOp", node->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| NodeItem *node_item = nullptr; | NodeItem *node_item = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node, &node_item)); | GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node, &node_item)); | ||||
| @@ -48,7 +48,6 @@ class HybridModelBuilder { | |||||
| static Status MergeNetOutputNode(ComputeGraph &compute_graph); | static Status MergeNetOutputNode(ComputeGraph &compute_graph); | ||||
| static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); | static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); | ||||
| static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); | static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); | ||||
| static Status InitWeights(); | |||||
| static Status BuildInputMapping(GraphItem &graph_item, | static Status BuildInputMapping(GraphItem &graph_item, | ||||
| std::vector<NodeItem *> &data_nodes, | std::vector<NodeItem *> &data_nodes, | ||||
| bool is_root_graph); | bool is_root_graph); | ||||
| @@ -68,6 +67,7 @@ class HybridModelBuilder { | |||||
| Status IndexSpecialNodes(); | Status IndexSpecialNodes(); | ||||
| Status InitRuntimeParams(); | Status InitRuntimeParams(); | ||||
| Status InitModelMem(); | Status InitModelMem(); | ||||
| Status InitWeights(); | |||||
| Status TransAllVarData(); | Status TransAllVarData(); | ||||
| Status CopyVarData(); | Status CopyVarData(); | ||||
| Status VarNodeToTensor(const NodePtr &var_node, std::unique_ptr<TensorValue> &tensor); | Status VarNodeToTensor(const NodePtr &var_node, std::unique_ptr<TensorValue> &tensor); | ||||
| @@ -88,8 +88,9 @@ class HybridModelBuilder { | |||||
| NodeItem *MutableNodeItem(const NodePtr &node); | NodeItem *MutableNodeItem(const NodePtr &node); | ||||
| GeRootModelPtr ge_root_model_; | GeRootModelPtr ge_root_model_; | ||||
| std::map<int, std::unique_ptr<TensorValue>> weights_; | |||||
| std::map<std::string, GeModelPtr> subgraph_models_; | std::map<std::string, GeModelPtr> subgraph_models_; | ||||
| std::map<std::string, NodePtr> constant_op_nodes_; | |||||
| HybridModel &hybrid_model_; | HybridModel &hybrid_model_; | ||||
| std::map<NodePtr, std::vector<std::pair<int, NodePtr>>> node_ref_inputs_; | std::map<NodePtr, std::vector<std::pair<int, NodePtr>>> node_ref_inputs_; | ||||
| int node_index = 0; | int node_index = 0; | ||||
| @@ -21,7 +21,20 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, AiCoreNodeExecutor); | REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, AiCoreNodeExecutor); | ||||
| namespace { | |||||
| bool IsNoOp(const NodeItem &node_item) { | |||||
| for (int i = 0; i < node_item.num_outputs; ++i) { | |||||
| const auto &tensor_desc = node_item.MutableOutputDesc(i); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| const auto &shape = tensor_desc->MutableShape(); | |||||
| if (shape.IsScalar() || shape.GetShapeSize() > 0) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace | |||||
| AiCoreNodeTask::AiCoreNodeTask(std::vector<std::unique_ptr<AiCoreOpTask>> &&tasks) : tasks_(std::move(tasks)) { | AiCoreNodeTask::AiCoreNodeTask(std::vector<std::unique_ptr<AiCoreOpTask>> &&tasks) : tasks_(std::move(tasks)) { | ||||
| } | } | ||||
| @@ -104,9 +117,13 @@ std::shared_ptr<NodeTask> AiCoreNodeTaskRegistry::GetTask(const std::string &nod | |||||
| Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, | Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, | ||||
| const NodePtr &node, shared_ptr<NodeTask> &task) const { | const NodePtr &node, shared_ptr<NodeTask> &task) const { | ||||
| GE_CHECK_NOTNULL(node); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| auto node_item = model.GetNodeItem(node); | |||||
| GE_CHECK_NOTNULL(node_item); | |||||
| if (IsNoOp(*node_item)) { | |||||
| task = MakeShared<NoOpTask>(); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto op_desc = node_item->op_desc; | |||||
| GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); | ||||
| auto ori_node_name = node->GetName(); | auto ori_node_name = node->GetName(); | ||||
| @@ -150,7 +167,7 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, | |||||
| Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] Start"); | ||||
| if (IsNoOp(context)) { | |||||
| if (IsNoOp(context.GetNodeItem())) { | |||||
| GELOGD("[%s] Skipping execution for op with empty outputs", context.GetNodeName()); | GELOGD("[%s] Skipping execution for op with empty outputs", context.GetNodeName()); | ||||
| auto ret = context.TryExecuteCallback(done_callback); | auto ret = context.TryExecuteCallback(done_callback); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End"); | ||||
| @@ -227,19 +244,6 @@ bool AiCoreNodeTask::IsSupportDynamicShape() { | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool AiCoreNodeTask::IsNoOp(TaskContext &task_context) { | |||||
| for (int i = 0; i < task_context.NumOutputs(); ++i) { | |||||
| const auto &tensor_desc = task_context.MutableOutputDesc(i); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| const auto &shape = tensor_desc->MutableShape(); | |||||
| if (shape.IsScalar() || shape.GetShapeSize() > 0) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| TaskCompilerFactory &TaskCompilerFactory::GetInstance() { | TaskCompilerFactory &TaskCompilerFactory::GetInstance() { | ||||
| static TaskCompilerFactory instance; | static TaskCompilerFactory instance; | ||||
| return instance; | return instance; | ||||
| @@ -60,7 +60,6 @@ class AiCoreNodeTask : public NodeTask { | |||||
| Status UpdateArgs(TaskContext &context) override; | Status UpdateArgs(TaskContext &context) override; | ||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
| private: | private: | ||||
| static bool IsNoOp(TaskContext &task_context); | |||||
| std::vector<std::unique_ptr<AiCoreOpTask>> tasks_; | std::vector<std::unique_ptr<AiCoreOpTask>> tasks_; | ||||
| }; | }; | ||||
| @@ -224,9 +224,9 @@ Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, | |||||
| node->GetName().c_str(), node_type.c_str()); | node->GetName().c_str(), node_type.c_str()); | ||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| } else if (node_type == CONSTANTOP || node_type == VARIABLE) { | |||||
| } else if (node_type == CONSTANT || node_type == CONSTANTOP || node_type == VARIABLE) { | |||||
| GELOGI("node %s type %s, use ConstantNodeTask.", node->GetName().c_str(), node_type.c_str()); | GELOGI("node %s type %s, use ConstantNodeTask.", node->GetName().c_str(), node_type.c_str()); | ||||
| auto tensor = model.GetVariable(node->GetName()); | |||||
| auto tensor = model.GetTensor(node); | |||||
| if (tensor == nullptr) { | if (tensor == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "Failed to get tensor by name: %s", node->GetName().c_str()); | GELOGE(INTERNAL_ERROR, "Failed to get tensor by name: %s", node->GetName().c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -238,5 +238,13 @@ NodeExecutorRegistrar::NodeExecutorRegistrar(NodeExecutorManager::ExecutorType e | |||||
| NodeExecutor *(*builder)()) { | NodeExecutor *(*builder)()) { | ||||
| NodeExecutorManager::GetInstance().RegisterExecutorBuilder(executor_type, builder); | NodeExecutorManager::GetInstance().RegisterExecutorBuilder(executor_type, builder); | ||||
| } | } | ||||
| Status NoOpTask::UpdateArgs(TaskContext &context) { | |||||
| GELOGD("[%s] Skipping UpdateArgs for op with empty outputs", context.GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status NoOpTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | |||||
| GELOGD("[%s] Skipping execute for op with empty outputs", context.GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -75,6 +75,12 @@ class NodeTask { | |||||
| virtual Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) = 0; | virtual Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) = 0; | ||||
| }; | }; | ||||
| class NoOpTask : public NodeTask { | |||||
| public: | |||||
| Status UpdateArgs(TaskContext &context) override; | |||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | |||||
| }; | |||||
| // Node executor | // Node executor | ||||
| class NodeExecutor { | class NodeExecutor { | ||||
| public: | public: | ||||