From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chentags/v1.3.0
| @@ -155,6 +155,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context | |||||
| string ctx_id = std::to_string(context.context_id); | string ctx_id = std::to_string(context.context_id); | ||||
| RuntimeInferenceContext::DestroyContext(ctx_id); | RuntimeInferenceContext::DestroyContext(ctx_id); | ||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | ||||
| RuntimeInferenceContext *ctx = nullptr; | |||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||||
| for (auto &host_tensor : context.model->GetHostTensors()) { | |||||
| auto node_id = host_tensor.first; | |||||
| for (const auto &output_idx_and_tensor : host_tensor.second) { | |||||
| auto output_idx = output_idx_and_tensor.first; | |||||
| GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||||
| ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) { | |||||
| string ctx_id = std::to_string(context.context_id); | string ctx_id = std::to_string(context.context_id); | ||||
| RuntimeInferenceContext::DestroyContext(ctx_id); | RuntimeInferenceContext::DestroyContext(ctx_id); | ||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | ||||
| RuntimeInferenceContext *ctx = nullptr; | |||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||||
| for (auto &host_tensor : context.model->GetHostTensors()) { | |||||
| auto node_id = host_tensor.first; | |||||
| for (const auto &output_idx_and_tensor : host_tensor.second) { | |||||
| auto output_idx = output_idx_and_tensor.first; | |||||
| GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||||
| ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -358,6 +358,10 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { | |||||
| return GetVariable(node->GetName()); | return GetVariable(node->GetName()); | ||||
| } | } | ||||
| const map<int64_t, std::vector<std::pair<int, Tensor>>> &HybridModel::GetHostTensors() const { | |||||
| return host_tensors_; | |||||
| } | |||||
| void *HybridModel::GetGlobalStep() const { | void *HybridModel::GetGlobalStep() const { | ||||
| if (global_step_ == nullptr) { | if (global_step_ == nullptr) { | ||||
| return nullptr; | return nullptr; | ||||
| @@ -93,6 +93,8 @@ class HybridModel { | |||||
| TensorValue* GetTensor(const NodePtr &node) const; | TensorValue* GetTensor(const NodePtr &node) const; | ||||
| const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const; | |||||
| const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | ||||
| const GraphItem *GetRootGraphItem() const; | const GraphItem *GetRootGraphItem() const; | ||||
| @@ -148,6 +150,7 @@ class HybridModel { | |||||
| std::unique_ptr<GraphItem> root_graph_item_; | std::unique_ptr<GraphItem> root_graph_item_; | ||||
| std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | ||||
| std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
| std::map<int64_t, std::vector<std::pair<int, Tensor>>> host_tensors_; | |||||
| bool is_new_model_desc_ = false; // support aipp | bool is_new_model_desc_ = false; // support aipp | ||||
| bool is_single_op_ = false; | bool is_single_op_ = false; | ||||
| @@ -151,6 +151,9 @@ Status HybridModelBuilder::Build() { | |||||
| GE_CHK_STATUS_RET(InitConstantOps(), "[Invoke][InitConstantOps] failed, model_name_:[%s]", GetGraphName()); | GE_CHK_STATUS_RET(InitConstantOps(), "[Invoke][InitConstantOps] failed, model_name_:[%s]", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(InitVariableTensors(), "[Invoke][InitVariableTensors], model_name_:[%s]", GetGraphName()); | GE_CHK_STATUS_RET(InitVariableTensors(), "[Invoke][InitVariableTensors], model_name_:[%s]", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName()); | GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(), | |||||
| "[Invoke][OptimizeDependenciesForConstantInputs] failed, model_name_:[%s]", | |||||
| GetGraphName()); | |||||
| GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); | GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -353,6 +356,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s | |||||
| auto src_node_item = MutableNodeItem(src_node); | auto src_node_item = MutableNodeItem(src_node); | ||||
| src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); | src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); | ||||
| dependent_for_shape_inference.emplace(src_node); | dependent_for_shape_inference.emplace(src_node); | ||||
| host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item); | |||||
| GELOGD("[%s] Dependent added from output of [%s:%d]", | GELOGD("[%s] Dependent added from output of [%s:%d]", | ||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| src_node_item->NodeName().c_str(), | src_node_item->NodeName().c_str(), | ||||
| @@ -1536,7 +1540,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { | |||||
| src_node->GetName().c_str(), | src_node->GetName().c_str(), | ||||
| src_op_type.c_str()); | src_op_type.c_str()); | ||||
| if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { | |||||
| if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -1545,6 +1549,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { | |||||
| GELOGD("Got parent output index = %u", parent_index); | GELOGD("Got parent output index = %u", parent_index); | ||||
| GE_CHECK_LE(parent_index, INT32_MAX); | GE_CHECK_LE(parent_index, INT32_MAX); | ||||
| node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); | node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); | ||||
| if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) { | |||||
| known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node); | |||||
| } | |||||
| } | } | ||||
| // Data nodes marked with REF_VAR_SRC_VAR_NAME | // Data nodes marked with REF_VAR_SRC_VAR_NAME | ||||
| @@ -2176,5 +2183,88 @@ Status HybridModelBuilder::ParseDependentByParallelGroup() { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() { | |||||
| std::map<NodePtr, std::set<uint32_t>> converted; | |||||
| for (auto &it : host_input_value_dependencies_) { | |||||
| auto node_item = it.first; | |||||
| std::map<NodeItem *, int> ref_counts; | |||||
| bool changed = false; | |||||
| for (auto output_idx_and_node : it.second) { | |||||
| auto output_idx = output_idx_and_node.first; | |||||
| auto src_node_item = output_idx_and_node.second; | |||||
| ++ref_counts[src_node_item]; | |||||
| NodePtr constant_node; | |||||
| if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) { | |||||
| constant_node = src_node_item->node; | |||||
| GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str()); | |||||
| } else { | |||||
| auto iter = known_subgraph_constant_output_refs_.find(src_node_item); | |||||
| if (iter != known_subgraph_constant_output_refs_.end()) { | |||||
| constant_node = iter->second[output_idx]; | |||||
| if (constant_node != nullptr) { | |||||
| GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (constant_node == nullptr) { | |||||
| GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str()); | |||||
| continue; | |||||
| } | |||||
| if (converted[constant_node].count(output_idx) == 0) { | |||||
| GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx), | |||||
| "[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str()); | |||||
| converted[constant_node].emplace(output_idx); | |||||
| } | |||||
| src_node_item->to_const_output_id_list.erase(output_idx); | |||||
| --ref_counts[src_node_item]; | |||||
| changed = true; | |||||
| } | |||||
| if (changed) { | |||||
| std::vector<NodePtr> depends_to_keep; | |||||
| for (auto &ref_count_it : ref_counts) { | |||||
| if (ref_count_it.second == 0) { | |||||
| GELOGD("[%s] no longer depends on [%s] for shape inference", | |||||
| node_item->NodeName().c_str(), | |||||
| ref_count_it.first->NodeName().c_str()); | |||||
| } else { | |||||
| depends_to_keep.emplace_back(ref_count_it.first->node); | |||||
| } | |||||
| } | |||||
| node_item->dependents_for_shape_inference.swap(depends_to_keep); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) { | |||||
| auto tensor_value = hybrid_model_.GetTensor(node); | |||||
| GE_CHECK_NOTNULL(tensor_value); | |||||
| auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc)); | |||||
| int64_t tensor_size = -1; | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size), | |||||
| "[%s] Failed to get tensor size", node->GetName().c_str()); | |||||
| if (tensor_size > 0) { | |||||
| auto copy_size = static_cast<size_t>(tensor_size); | |||||
| GE_CHECK_GE(tensor_value->GetSize(), copy_size); | |||||
| std::vector<uint8_t> buffer(copy_size); | |||||
| GE_CHK_RT_RET(rtMemcpy(buffer.data(), | |||||
| copy_size, | |||||
| tensor_value->GetData(), | |||||
| copy_size, | |||||
| RT_MEMCPY_DEVICE_TO_HOST)); | |||||
| tensor.SetData(std::move(buffer)); | |||||
| GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size); | |||||
| } | |||||
| hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor)); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -91,6 +91,8 @@ class HybridModelBuilder { | |||||
| Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | ||||
| Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | ||||
| Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | ||||
| Status OptimizeDependenciesForConstantInputs(); | |||||
| Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx); | |||||
| const char* GetGraphName() const { | const char* GetGraphName() const { | ||||
| return hybrid_model_.model_name_.c_str(); | return hybrid_model_.model_name_.c_str(); | ||||
| @@ -110,6 +112,12 @@ class HybridModelBuilder { | |||||
| RuntimeParam &runtime_param_; | RuntimeParam &runtime_param_; | ||||
| VarManager *var_manager_ = nullptr; | VarManager *var_manager_ = nullptr; | ||||
| // map<known_node_item, map<output_idx, constant_node>> | |||||
| std::map<NodeItem *, std::map<uint32_t, NodePtr>> known_subgraph_constant_output_refs_; | |||||
| // map<dst_node_item, vector<output_idx, src_node_item>> | |||||
| std::map<NodeItem *, std::vector<std::pair<uint32_t, NodeItem *>>> host_input_value_dependencies_; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -466,3 +466,77 @@ TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) { | |||||
| ret = HybridModelExecutor::CheckInputShapeByShapeRange(&graph_item, args1); | ret = HybridModelExecutor::CheckInputShapeByShapeRange(&graph_item, args1); | ||||
| ASSERT_EQ(ret, ge::INTERNAL_ERROR); | ASSERT_EQ(ret, ge::INTERNAL_ERROR); | ||||
| } | } | ||||
| TEST_F(UtestGeHybrid, TestOptimizeDependenciesForConstInputs) { | |||||
| ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test"); | |||||
| GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph); | |||||
| HybridModel model(root_model); | |||||
| model.root_graph_ = compute_graph; | |||||
| HybridModelBuilder builder(model); | |||||
| GeShape shape({2, 16}); | |||||
| GeTensorDesc tensor_desc(shape); | |||||
| std::unique_ptr<NodeItem> const_node_item; | |||||
| { | |||||
| OpDescPtr const_op_desc = CreateOpDesc("Constant", "Const"); | |||||
| const_op_desc->AddOutputDesc(tensor_desc); | |||||
| auto const_node = compute_graph->AddNode(const_op_desc); | |||||
| NodeItem::Create(const_node, const_node_item); | |||||
| } | |||||
| std::unique_ptr<NodeItem> non_const_node_item; | |||||
| { | |||||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||||
| op_desc->AddOutputDesc(tensor_desc); | |||||
| auto const_node = compute_graph->AddNode(op_desc); | |||||
| NodeItem::Create(const_node, non_const_node_item); | |||||
| } | |||||
| std::unique_ptr<NodeItem> known_node_item; | |||||
| { | |||||
| OpDescPtr known_op_desc = CreateOpDesc("known", "PartitionedCall"); | |||||
| known_op_desc->AddOutputDesc(tensor_desc); | |||||
| known_op_desc->AddOutputDesc(tensor_desc); | |||||
| auto known_node = compute_graph->AddNode(known_op_desc); | |||||
| NodeItem::Create(known_node, known_node_item); | |||||
| } | |||||
| std::unique_ptr<NodeItem> dst_node_item; | |||||
| { | |||||
| OpDescPtr known_op_desc = CreateOpDesc("SomeOp", "SomeOpType "); | |||||
| known_op_desc->AddOutputDesc(tensor_desc); | |||||
| known_op_desc->AddOutputDesc(tensor_desc); | |||||
| auto known_node = compute_graph->AddNode(known_op_desc); | |||||
| NodeItem::Create(known_node, dst_node_item); | |||||
| } | |||||
| float buffer[2 * 16]; | |||||
| unique_ptr<TensorValue> tensor_value(new TensorValue(buffer, sizeof(buffer))); | |||||
| model.constant_tensors_[const_node_item->node] = std::move(tensor_value); | |||||
| // Case 1. connect to Const | |||||
| auto output_id = 1; | |||||
| builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(output_id, const_node_item.get()); | |||||
| builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(0, non_const_node_item.get()); | |||||
| dst_node_item->dependents_for_shape_inference.emplace_back(const_node_item->node); | |||||
| dst_node_item->dependents_for_shape_inference.emplace_back(non_const_node_item->node); | |||||
| ASSERT_EQ(builder.OptimizeDependenciesForConstantInputs(), SUCCESS); | |||||
| ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1); | |||||
| ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node); | |||||
| // Case 2. connect to known-subgraph, netoutput connect to Const | |||||
| builder.host_input_value_dependencies_.clear(); | |||||
| dst_node_item->dependents_for_shape_inference.clear(); | |||||
| builder.known_subgraph_constant_output_refs_[known_node_item.get()].emplace(output_id, const_node_item->node); | |||||
| builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(output_id, known_node_item.get()); | |||||
| builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(0, non_const_node_item.get()); | |||||
| dst_node_item->dependents_for_shape_inference.emplace_back(known_node_item->node); | |||||
| dst_node_item->dependents_for_shape_inference.emplace_back(non_const_node_item->node); | |||||
| ASSERT_EQ(builder.OptimizeDependenciesForConstantInputs(), SUCCESS); | |||||
| ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1); | |||||
| ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node); | |||||
| } | |||||