| @@ -18,14 +18,11 @@ | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/opskernel/ops_kernel_info_types.h" | #include "common/opskernel/ops_kernel_info_types.h" | ||||
| #include "graph/build/logical_stream_allocator.h" | |||||
| #include "graph/build/run_context.h" | #include "graph/build/run_context.h" | ||||
| #include "graph/build/stream_graph_optimizer.h" | #include "graph/build/stream_graph_optimizer.h" | ||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "graph/passes/mark_same_addr_pass.h" | |||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "graph/common/ge_call_wrapper.h" | |||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| #include "model/ge_model.h" | #include "model/ge_model.h" | ||||
| @@ -37,21 +34,6 @@ const int32_t kInvalidPerfLevel = -1; | |||||
| namespace ge { | namespace ge { | ||||
| GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | ||||
| Status GraphBuilder::MarkGraph(ComputeGraphPtr &graph) { | |||||
| GE_CHECK_NOTNULL(graph); | |||||
| bool is_unknown_shape = false; | |||||
| for (const auto &node : graph->GetDirectNode()) { | |||||
| GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), | |||||
| "Get node[%s] shape status failed!", node->GetName().c_str()); | |||||
| if (is_unknown_shape) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| graph->SetGraphUnknownFlag(is_unknown_shape); | |||||
| GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); | |||||
| return SUCCESS; | |||||
| } | |||||
| void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | ||||
| stream_max_parallel_num_ = options.stream_max_parallel_num; | stream_max_parallel_num_ = options.stream_max_parallel_num; | ||||
| hcom_parallel_ = options.hcom_parallel; | hcom_parallel_ = options.hcom_parallel; | ||||
| @@ -72,7 +54,7 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | |||||
| return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
| } | } | ||||
| for (const auto &node_ptr : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (const auto &node_ptr : graph->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | ||||
| std::string kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName(); | std::string kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName(); | ||||
| if (kernel_lib_name.empty()) { | if (kernel_lib_name.empty()) { | ||||
| @@ -120,7 +102,11 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||||
| graph->GetName().c_str()); | graph->GetName().c_str()); | ||||
| auto parent_op_desc = parent_node_ptr->GetOpDesc(); | auto parent_op_desc = parent_node_ptr->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(parent_op_desc); | GE_CHECK_NOTNULL(parent_op_desc); | ||||
| bool is_unknown_shape = graph->GetGraphUnknownFlag(); | |||||
| bool is_unknown_shape = false; | |||||
| if (!AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape)) { | |||||
| GELOGE(PARAM_INVALID, "Get op %s unknown shape attr failed.", parent_op_desc->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (is_unknown_shape) { | if (is_unknown_shape) { | ||||
| GELOGI("Current graph[%s] is unknown, no need to update parent node[%s] output size.", graph->GetName().c_str(), | GELOGI("Current graph[%s] is unknown, no need to update parent node[%s] output size.", graph->GetName().c_str(), | ||||
| parent_node_ptr->GetName().c_str()); | parent_node_ptr->GetName().c_str()); | ||||
| @@ -135,14 +121,14 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||||
| for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) { | for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) { | ||||
| auto index = in_data_anchor->GetIdx(); | auto index = in_data_anchor->GetIdx(); | ||||
| ge::GeTensorDesc desc_temp = op_desc->GetInputDesc(index); | ge::GeTensorDesc desc_temp = op_desc->GetInputDesc(index); | ||||
| int64_t size = 0; | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc_temp, size) != SUCCESS, GELOGI("Get size failed!")); | |||||
| uint32_t parent_index = 0; | uint32_t parent_index = 0; | ||||
| if (!AttrUtils::GetInt(desc_temp, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | if (!AttrUtils::GetInt(desc_temp, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | ||||
| GELOGI("NetOutput input tensor %d, attr %s not found.", index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||||
| continue; | |||||
| GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.", index, | |||||
| ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | } | ||||
| int64_t size = 0; | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc_temp, size) != SUCCESS, GELOGI("Get size failed!")); | |||||
| ge::GeTensorDesc parent_desc_temp = parent_op_desc->GetOutputDesc(parent_index); | ge::GeTensorDesc parent_desc_temp = parent_op_desc->GetOutputDesc(parent_index); | ||||
| ge::TensorUtils::SetSize(parent_desc_temp, size); | ge::TensorUtils::SetSize(parent_desc_temp, size); | ||||
| GE_CHK_STATUS_RET(parent_op_desc->UpdateOutputDesc(parent_index, parent_desc_temp)); | GE_CHK_STATUS_RET(parent_op_desc->UpdateOutputDesc(parent_index, parent_desc_temp)); | ||||
| @@ -190,7 +176,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||||
| auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | ||||
| GE_TIMESTAMP_START(BuildSubgraph); | GE_TIMESTAMP_START(BuildSubgraph); | ||||
| ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
| ge::ModelBuilder builder(comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
| GE_DUMP(comp_graph, "BeforePreBuildModel"); | GE_DUMP(comp_graph, "BeforePreBuildModel"); | ||||
| GE_TIMESTAMP_START(PreBuildModel); | GE_TIMESTAMP_START(PreBuildModel); | ||||
| GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", | GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", | ||||
| @@ -243,7 +229,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo | |||||
| GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); | GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); | ||||
| GE_DUMP(comp_graph, "AfterCalcOpParam"); | GE_DUMP(comp_graph, "AfterCalcOpParam"); | ||||
| Graph2SubGraphInfoList subgraph_map; | Graph2SubGraphInfoList subgraph_map; | ||||
| ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
| ge::ModelBuilder builder(comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
| ModelPtr model_ptr = MakeShared<ge::Model>(); | ModelPtr model_ptr = MakeShared<ge::Model>(); | ||||
| if (model_ptr == nullptr) { | if (model_ptr == nullptr) { | ||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| @@ -277,38 +263,51 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id) { | uint64_t session_id) { | ||||
| GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | ||||
| // mark unknown shape attr | |||||
| for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||||
| auto status = MarkGraph(sub_graph); | |||||
| if (status != SUCCESS) { | |||||
| GELOGE(FAILED, "mark graph failed!"); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| // Update Root Graph Data size | |||||
| for (auto &node : comp_graph->GetDirectNode()) { | |||||
| for (const auto &node : comp_graph->GetDirectNode()) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| op_desc->SetStreamId(kInvalidStream); | |||||
| if (node->GetType() == DATA) { | if (node->GetType() == DATA) { | ||||
| GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "Calc dynamic shape root graph data[%s] size failed.", | GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "Calc dynamic shape root graph data[%s] size failed.", | ||||
| op_desc->GetName().c_str()); | op_desc->GetName().c_str()); | ||||
| } | } | ||||
| } | |||||
| // | |||||
| for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||||
| if (sub_graph->GetGraphUnknownFlag()) { | |||||
| // unknown shape build flow | |||||
| GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||||
| "Build for unknown shape graph failed."); | |||||
| } else { | |||||
| // known shape build flow | |||||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||||
| "Build for known shape graph failed."); | |||||
| // ATTR_NAME_IS_UNKNOWN_SHAPE is set on "graph partion" stage, but afer fusion , the graph may | |||||
| // be changed so here need to renew. For example , the scene followed: | |||||
| // (known)partioncall(known) (known)partioncall(known) | |||||
| // After fusion | |||||
| // | --> | |||||
| // (known)Unique(unknown)--->(unknow)Shape(unknown) (known)FuncDef(known) | |||||
| // if scene like this , it should be process as known shape graph | |||||
| bool is_unknown_shape = false; | |||||
| GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), | |||||
| "Get node[%s] shape status failed!", node->GetName().c_str()); | |||||
| if (!is_unknown_shape) { | |||||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape), return FAILED, | |||||
| "Renew node [%s] attr[%s] failed!", node->GetName().c_str(), ATTR_NAME_IS_UNKNOWN_SHAPE.c_str()); | |||||
| GELOGD("renew node [%s] attr[%s] success! value is %d", node->GetName().c_str(), | |||||
| ATTR_NAME_IS_UNKNOWN_SHAPE.c_str(), is_unknown_shape); | |||||
| } | } | ||||
| ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); | |||||
| } | |||||
| vector<string> subgraph_names = op_desc->GetSubgraphInstanceNames(); | |||||
| for (auto subgraph_name : subgraph_names) { | |||||
| ComputeGraphPtr subgraph = comp_graph->GetSubgraph(subgraph_name); | |||||
| bool is_unknown_shape = false; | |||||
| if (!AttrUtils::GetBool(op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape)) { | |||||
| GELOGE(PARAM_INVALID, "Get op %s unknown shape attr failed.", op_desc->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (is_unknown_shape) { | |||||
| // unknown shape build flow | |||||
| GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(subgraph, ge_model_ptr, session_id), | |||||
| "Build for unknown shape graph failed."); | |||||
| } else { | |||||
| // known shape build flow | |||||
| GE_CHK_STATUS_RET(BuildForKnownShapeGraph(subgraph, subgraph_ptr_list, ge_model_ptr, session_id), | |||||
| "Build for known shape graph failed."); | |||||
| } | |||||
| ge_root_model_ptr->SetSubgraphInstanceNameToModel(subgraph_name, ge_model_ptr); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -328,9 +327,8 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr | |||||
| GELOGE(INTERNAL_ERROR, "Get weight memory size fail."); | GELOGE(INTERNAL_ERROR, "Get weight memory size fail."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| auto var_manager = VarManager::Instance(session_id); | |||||
| auto *get_mem_base = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(var_manager->GetVarMemMaxSize())); | |||||
| auto *get_mem_base = | |||||
| reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(ge::VarManager::Instance(0)->GetVarMemMaxSize())); | |||||
| uint8_t *get_weight_mem_base = get_mem_base; | uint8_t *get_weight_mem_base = get_mem_base; | ||||
| if (weight_size > 0) { | if (weight_size > 0) { | ||||
| get_weight_mem_base = get_mem_base + memory_size; | get_weight_mem_base = get_mem_base + memory_size; | ||||
| @@ -356,8 +354,11 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GE_DUMP(comp_graph, "AfterOptimizeStreamedSubGraph"); | GE_DUMP(comp_graph, "AfterOptimizeStreamedSubGraph"); | ||||
| auto *get_var_mem_base = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(var_manager->GetVarMemLogicBase())); | |||||
| uint64_t var_size = (var_manager->GetVarMemSize(RT_MEMORY_HBM) > 0) ? var_manager->GetVarMemMaxSize() : 0; | |||||
| auto *get_var_mem_base = | |||||
| reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(ge::VarManager::Instance(0)->GetVarMemLogicBase())); | |||||
| uint64_t var_size = (ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) > 0) | |||||
| ? ge::VarManager::Instance(0)->GetVarMemMaxSize() | |||||
| : 0; | |||||
| TaskGenerator task_generator(get_var_mem_base, var_size); | TaskGenerator task_generator(get_var_mem_base, var_size); | ||||
| ret = task_generator.GetTaskInfo(*model_ptr, comp_graph, session_id, run_context.GetRunContext()); | ret = task_generator.GetTaskInfo(*model_ptr, comp_graph, session_id, run_context.GetRunContext()); | ||||
| @@ -367,13 +368,6 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr | |||||
| Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { | Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { | ||||
| // set input_desc.size = src_node.output_desc.size | // set input_desc.size = src_node.output_desc.size | ||||
| if (node_ptr->GetType() == DATA) { | if (node_ptr->GetType() == DATA) { | ||||
| bool is_unknown_shape = false; | |||||
| GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape), | |||||
| "Get data node[%s] shape status failed!", node_ptr->GetName().c_str()); | |||||
| if (is_unknown_shape) { | |||||
| GELOGD("data node: %s is unknown shape, do not set input size!", node_ptr->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (UpdateDataInputSize(node_ptr) != SUCCESS) { | if (UpdateDataInputSize(node_ptr) != SUCCESS) { | ||||
| GELOGE(FAILED, "Update data input size failed."); | GELOGE(FAILED, "Update data input size failed."); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -404,7 +398,7 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { | |||||
| GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
| ge::TensorUtils::SetSize(const_cast<GeTensorDesc &>(*input_desc), size); | ge::TensorUtils::SetSize(const_cast<GeTensorDesc &>(*input_desc), size); | ||||
| GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); | GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); | ||||
| GELOGD("%s input desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", node_ptr->GetName().c_str(), | |||||
| GELOGD("%s input desc, dim_size: %zu, mem_size: %u, format: %s, type: %s.", node_ptr->GetName().c_str(), | |||||
| input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), | input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), | ||||
| TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); | TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); | ||||
| } | } | ||||
| @@ -67,7 +67,6 @@ class GraphBuilder { | |||||
| GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status MarkGraph(ComputeGraphPtr &graph); | |||||
| int build_mode_; | int build_mode_; | ||||
| std::map<std::string, int> stream_max_parallel_num_; | std::map<std::string, int> stream_max_parallel_num_; | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include "graph/label/label_maker.h" | #include "graph/label/label_maker.h" | ||||
| namespace ge { | namespace ge { | ||||
| LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(graph) {} | LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(graph) {} | ||||
| Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { | Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { | ||||
| @@ -75,4 +76,5 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node | |||||
| (void)functional_nodes.insert(parent); // unique functional node. | (void)functional_nodes.insert(parent); // unique functional node. | ||||
| return true; | return true; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -22,7 +22,6 @@ | |||||
| #include "framework/common/types.h" | #include "framework/common/types.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/common/ge_call_wrapper.h" | |||||
| using std::map; | using std::map; | ||||
| using std::queue; | using std::queue; | ||||
| @@ -30,6 +29,12 @@ using std::set; | |||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| namespace { | |||||
| const char *const kAICPUEngineName = "DNN_VM_AICPU"; | |||||
| const char *const kAttrNameParentOpType = "parentOpType"; | |||||
| const size_t kHeadNodeMaxNum = 820; // calculated by 1024 * 0.8 | |||||
| } // namespace | |||||
| namespace ge { | namespace ge { | ||||
| LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | ||||
| @@ -49,6 +54,24 @@ bool LogicalStreamPass::HasAssignedStream(const Subgraph &subgraph) const { | |||||
| return subgraph.stream_id != kInvalidStream; | return subgraph.stream_id != kInvalidStream; | ||||
| } | } | ||||
| bool LogicalStreamPass::HasNonConstInputNode(const Subgraph &subgraph) const { | |||||
| const SubGraphInfo &subgraph_info = subgraph.subgraph_info; | |||||
| const auto &pld_to_end_map = subgraph_info.GetPld2EndMap(); | |||||
| for (const auto &pld_to_end : pld_to_end_map) { | |||||
| const NodePtr &placeholder = pld_to_end.first; | |||||
| if (placeholder != nullptr) { | |||||
| string parent_op_type; | |||||
| if (AttrUtils::GetStr(placeholder->GetOpDesc(), kAttrNameParentOpType, parent_op_type)) { | |||||
| if ((parent_op_type != CONSTANT) && (parent_op_type != CONSTANTOP)) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | ||||
| bool changed = false; | bool changed = false; | ||||
| int64_t &next_stream = context.next_stream; | int64_t &next_stream = context.next_stream; | ||||
| @@ -110,6 +133,21 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||||
| Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | ||||
| bool changed = false; | bool changed = false; | ||||
| if (IsHeadNodeExceeded(subgraphs)) { | |||||
| int64_t &next_stream = context.next_stream; | |||||
| for (const SubgraphPtr &subgraph : subgraphs) { | |||||
| if (!HasAssignedStream(*subgraph)) { | |||||
| subgraph->stream_id = next_stream; | |||||
| changed = true; | |||||
| } | |||||
| } | |||||
| if (changed) { | |||||
| ++next_stream; | |||||
| return SUCCESS; | |||||
| } | |||||
| return NOT_CHANGED; | |||||
| } | |||||
| map<NodePtr, SubgraphPtr> end_subgraph_map; | map<NodePtr, SubgraphPtr> end_subgraph_map; | ||||
| map<NodePtr, SubgraphPtr> pld_subgraph_map; | map<NodePtr, SubgraphPtr> pld_subgraph_map; | ||||
| InitEndSubgraphMap(subgraphs, end_subgraph_map); | InitEndSubgraphMap(subgraphs, end_subgraph_map); | ||||
| @@ -152,6 +190,24 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||||
| return changed ? SUCCESS : NOT_CHANGED; | return changed ? SUCCESS : NOT_CHANGED; | ||||
| } | } | ||||
| bool AssignByDependencyPass::IsHeadNodeExceeded(const vector<SubgraphPtr> &subgraphs) const { | |||||
| size_t aicpu_node_num = 0; | |||||
| for (const SubgraphPtr &subgraph : subgraphs) { | |||||
| if (subgraph->engine_conf.id == kAICPUEngineName && !HasNonConstInputNode(*subgraph)) { | |||||
| const SubGraphInfo &subgraph_info = subgraph->subgraph_info; | |||||
| auto compute_graph = subgraph_info.GetSubGraph(); | |||||
| aicpu_node_num += compute_graph->GetDirectNode().size() - subgraph_info.GetPld2EndMap().size() - | |||||
| subgraph_info.GetEnd2PldMap().size(); | |||||
| if (aicpu_node_num > kHeadNodeMaxNum) { | |||||
| GELOGI("aicpu_node_num, %zu", aicpu_node_num); | |||||
| return true; | |||||
| } | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| void AssignByDependencyPass::InitEndSubgraphMap(const vector<SubgraphPtr> &subgraphs, | void AssignByDependencyPass::InitEndSubgraphMap(const vector<SubgraphPtr> &subgraphs, | ||||
| map<NodePtr, SubgraphPtr> &end_subgraph_map) { | map<NodePtr, SubgraphPtr> &end_subgraph_map) { | ||||
| for (const auto &subgraph : subgraphs) { | for (const auto &subgraph : subgraphs) { | ||||
| @@ -671,7 +727,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
| int64_t stream_num = context_.next_stream; | int64_t stream_num = context_.next_stream; | ||||
| vector<bool> stream_has_node(stream_num); | vector<bool> stream_has_node(stream_num); | ||||
| for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (const NodePtr &node : graph->GetAllNodes()) { | |||||
| if (node != nullptr) { | if (node != nullptr) { | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
| @@ -692,7 +748,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
| } | } | ||||
| } | } | ||||
| for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (const NodePtr &node : graph->GetAllNodes()) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
| int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
| @@ -81,6 +81,9 @@ class LogicalStreamPass { | |||||
| bool HasStreamLabel(const Subgraph &subgraph) const; | bool HasStreamLabel(const Subgraph &subgraph) const; | ||||
| bool HasAssignedStream(const Subgraph &subgraph) const; | bool HasAssignedStream(const Subgraph &subgraph) const; | ||||
| // Determine if the input of the subgraph is a constant. | |||||
| bool HasNonConstInputNode(const Subgraph &subgraph) const; | |||||
| private: | private: | ||||
| std::string name_; | std::string name_; | ||||
| }; | }; | ||||
| @@ -118,6 +121,7 @@ class AssignByDependencyPass : public LogicalStreamPass { | |||||
| void UpdateAssignedSubgraphs(Context &context); | void UpdateAssignedSubgraphs(Context &context); | ||||
| void UpdateReusedSubgraphs(); | void UpdateReusedSubgraphs(); | ||||
| bool IsHeadNodeExceeded(const std::vector<SubgraphPtr> &subgraphs) const; | |||||
| bool CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph, | bool CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph, | ||||
| const std::map<NodePtr, SubgraphPtr> &pld_subgraph_map); | const std::map<NodePtr, SubgraphPtr> &pld_subgraph_map); | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <sstream> | #include <sstream> | ||||
| #include "external/ge/ge_api_types.h" | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "graph/anchor.h" | #include "graph/anchor.h" | ||||
| #include "graph/buffer.h" | #include "graph/buffer.h" | ||||
| @@ -40,6 +39,7 @@ namespace { | |||||
| const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | ||||
| const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | ||||
| const char *const kOpNoReuseMem = "no_reuse_mem_flag"; | const char *const kOpNoReuseMem = "no_reuse_mem_flag"; | ||||
| const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory"; | |||||
| const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | ||||
| const int kReuseMaxCount = 10; | const int kReuseMaxCount = 10; | ||||
| const int kReuseMaxOpNum = 10; | const int kReuseMaxOpNum = 10; | ||||
| @@ -133,20 +133,21 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | |||||
| } | } | ||||
| bool CanNotLifeReuse(MemoryBlock *block) { | bool CanNotLifeReuse(MemoryBlock *block) { | ||||
| if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_) { | |||||
| if (block == nullptr || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_ || | |||||
| block->GetLifeEnd() == kMaxLifeTime) { | |||||
| return true; | return true; | ||||
| } | } | ||||
| return false; | return false; | ||||
| } | } | ||||
| void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | |||||
| void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block) { | |||||
| if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | ||||
| return; | return; | ||||
| } | } | ||||
| MemoryBlock *parent = nullptr; | MemoryBlock *parent = nullptr; | ||||
| MemoryBlock *child = nullptr; | MemoryBlock *child = nullptr; | ||||
| // merge small block to large block | // merge small block to large block | ||||
| if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | |||||
| if ((block->GetLifeBegin() > GetLifeEnd()) && (block->stream_id_ == stream_id_)) { | |||||
| if ((child_offset_ + block->block_size_) <= block_size_) { | if ((child_offset_ + block->block_size_) <= block_size_) { | ||||
| parent = this; | parent = this; | ||||
| child = block; | child = block; | ||||
| @@ -180,87 +181,6 @@ size_t MemoryBlock::GetLifeBegin() { | |||||
| return life_time; | return life_time; | ||||
| } | } | ||||
| /// |-stream 1-| |-stream 2-| | |||||
| /// |--block1--| |--block---| | |||||
| /// |--block2--| |--block---| | |||||
| /// |--block3--|\ |--block---| | |||||
| /// |--block---| \ |--block---| | |||||
| /// |--block---| \|--block---| | |||||
| /// |--block---| |--block7--| | |||||
| /// |--block---| |--block---| | |||||
| /// block7's first node's input node's life begin > block2's life end, block7 can reuse block1~block2 | |||||
| size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &total_node_depend_stream_life) { | |||||
| AddDependLifeBegin(total_node_depend_stream_life); | |||||
| auto it = depend_stream_life_.find(stream_id); | |||||
| if (it == depend_stream_life_.end()) { | |||||
| return 0; | |||||
| } | |||||
| return it->second; | |||||
| } | |||||
| void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id, | |||||
| std::map<int64_t, size_t> &depend_stream_life, DependStreamLife &total_node_depend_stream_life) { | |||||
| GE_CHECK_NOTNULL_EXEC(node, return ); | |||||
| auto node_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(node_desc, return ); | |||||
| auto node_id = node_desc->GetId(); | |||||
| auto stream_life = total_node_depend_stream_life.find(node_id); | |||||
| if (stream_life != total_node_depend_stream_life.end()) { | |||||
| for (auto &it : stream_life->second) { | |||||
| if (depend_stream_life.find(it.first) == depend_stream_life.end()) { | |||||
| depend_stream_life[it.first] = it.second; | |||||
| } | |||||
| } | |||||
| return; | |||||
| } | |||||
| for (const auto &in_anchor : node->GetAllInAnchors()) { | |||||
| GE_CHECK_NOTNULL_EXEC(in_anchor, continue); | |||||
| for (auto peer_out_anchor : in_anchor->GetPeerAnchors()) { | |||||
| GE_CHECK_NOTNULL_EXEC(peer_out_anchor, continue); | |||||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL_EXEC(peer_node, continue); | |||||
| auto peer_node_desc = peer_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(peer_node_desc, continue); | |||||
| auto peer_node_stream_id = peer_node_desc->GetStreamId(); | |||||
| if (peer_node_stream_id < 0) { | |||||
| continue; | |||||
| } | |||||
| size_t peer_node_life_time = peer_node_desc->GetId(); | |||||
| auto it = depend_stream_life.find(peer_node_stream_id); | |||||
| if (it == depend_stream_life.end() || peer_node_life_time > it->second) { | |||||
| depend_stream_life[peer_node_stream_id] = peer_node_life_time; | |||||
| if (peer_node_stream_id != stream_id) { | |||||
| GELOGI("Node:%s stream id:%ld depend node:%s stream id:%ld index[%d] life time[%zu].", | |||||
| org_node->GetName().c_str(), stream_id, peer_node_desc->GetName().c_str(), peer_node_stream_id, | |||||
| peer_out_anchor->GetIdx(), peer_node_life_time); | |||||
| } | |||||
| AddDependLife(org_node, peer_node, stream_id, depend_stream_life, total_node_depend_stream_life); | |||||
| } | |||||
| } | |||||
| } | |||||
| // save on node to save next calculation | |||||
| for (auto &it : depend_stream_life) { | |||||
| if (total_node_depend_stream_life[node_id].find(it.first) == total_node_depend_stream_life[node_id].end()) { | |||||
| total_node_depend_stream_life[node_id][it.first] = it.second; | |||||
| } | |||||
| } | |||||
| } | |||||
| void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_life) { | |||||
| if (!depend_stream_life_.empty()) { | |||||
| return; | |||||
| } | |||||
| if (!node_type_index_list_.empty()) { | |||||
| auto node = node_type_index_list_.front().node; | |||||
| if (node != nullptr) { | |||||
| AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life); | |||||
| } | |||||
| } | |||||
| depend_stream_life_[stream_id_] = GetLifeBegin(); | |||||
| } | |||||
| size_t MemoryBlock::GetLifeEnd() { | size_t MemoryBlock::GetLifeEnd() { | ||||
| if (!node_type_index_list_.empty()) { | if (!node_type_index_list_.empty()) { | ||||
| return node_type_index_list_.back().life_time_end; | return node_type_index_list_.back().life_time_end; | ||||
| @@ -382,7 +302,7 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
| if (iter1 == anchor_to_symbol_.end()) { | if (iter1 == anchor_to_symbol_.end()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| const std::string &symbol = iter1->second; | |||||
| std::string symbol = iter1->second; | |||||
| auto iter2 = symbol_size_.find(symbol); | auto iter2 = symbol_size_.find(symbol); | ||||
| if (iter2 == symbol_size_.end()) { | if (iter2 == symbol_size_.end()) { | ||||
| symbol_size_[symbol] = size; | symbol_size_[symbol] = size; | ||||
| @@ -397,7 +317,7 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
| all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | ||||
| } | } | ||||
| GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); | GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); | ||||
| for (const auto &pair : symbol_size_) { | |||||
| for (auto &pair : symbol_size_) { | |||||
| all_memory_size.emplace_back(pair.second); | all_memory_size.emplace_back(pair.second); | ||||
| } | } | ||||
| sort(all_memory_size.begin(), all_memory_size.end()); | sort(all_memory_size.begin(), all_memory_size.end()); | ||||
| @@ -507,6 +427,14 @@ bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const Me | |||||
| return can_reuse; | return can_reuse; | ||||
| } | } | ||||
| bool CanReuseByStream(const std::unordered_set<int64_t> &reuse_stream, MemoryBlock &reusable_block) { | |||||
| bool can_reuse = false; | |||||
| if (reuse_stream.find(reusable_block.stream_id_) != reuse_stream.cend()) { | |||||
| can_reuse = true; | |||||
| } | |||||
| return can_reuse; | |||||
| } | |||||
| bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
| uint32_t &peer_input_index) { | uint32_t &peer_input_index) { | ||||
| if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | ||||
| @@ -567,11 +495,11 @@ void BlockMemAssigner::InitReuseFlag() { | |||||
| ge::CONSTANT, ge::CONSTANTOP}; | ge::CONSTANT, ge::CONSTANTOP}; | ||||
| static const std::set<std::string> kPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, | static const std::set<std::string> kPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, | ||||
| ge::REFENTER, ge::NEXTITERATION, ge::REFNEXTITERATION}; | ge::REFENTER, ge::NEXTITERATION, ge::REFNEXTITERATION}; | ||||
| for (const auto &pair : symbol_to_anchors_) { | |||||
| for (auto &pair : symbol_to_anchors_) { | |||||
| std::string symbol = pair.first; | std::string symbol = pair.first; | ||||
| bool pre_reuse_flag = true; | bool pre_reuse_flag = true; | ||||
| bool post_reuse_flag = true; | bool post_reuse_flag = true; | ||||
| for (const auto &node_index_io : pair.second) { | |||||
| for (auto &node_index_io : pair.second) { | |||||
| if (node_index_io.io_type_ == kIn) { | if (node_index_io.io_type_ == kIn) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -585,13 +513,13 @@ void BlockMemAssigner::InitReuseFlag() { | |||||
| if (node_index_io.node_->GetOutDataNodes().empty()) { | if (node_index_io.node_->GetOutDataNodes().empty()) { | ||||
| out_flg = true; | out_flg = true; | ||||
| } | } | ||||
| for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
| for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
| if (IsDirectOutputNode(in_anchor->GetOwnerNode(), in_anchor->GetIdx())) { | if (IsDirectOutputNode(in_anchor->GetOwnerNode(), in_anchor->GetIdx())) { | ||||
| out_flg = true; | out_flg = true; | ||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| const std::string &type = out_anchor->GetOwnerNode()->GetType(); | |||||
| std::string type = out_anchor->GetOwnerNode()->GetType(); | |||||
| pre_reuse_flag = pre_reuse_flag && !out_flg && (kPreReuseTypes.count(type) == 0); | pre_reuse_flag = pre_reuse_flag && !out_flg && (kPreReuseTypes.count(type) == 0); | ||||
| post_reuse_flag = post_reuse_flag && (kPostReuseTypes.count(type) == 0); | post_reuse_flag = post_reuse_flag && (kPostReuseTypes.count(type) == 0); | ||||
| if (!pre_reuse_flag && !post_reuse_flag) { | if (!pre_reuse_flag && !post_reuse_flag) { | ||||
| @@ -624,7 +552,7 @@ bool BlockMemAssigner::IsPreReuse(const NodePtr &node, uint32_t out_index) const | |||||
| return false; | return false; | ||||
| } | } | ||||
| const std::string &symbol = iter1->second; | |||||
| std::string symbol = iter1->second; | |||||
| auto iter2 = pre_reuse_flag_.find(symbol); | auto iter2 = pre_reuse_flag_.find(symbol); | ||||
| if (iter2 == pre_reuse_flag_.end()) { | if (iter2 == pre_reuse_flag_.end()) { | ||||
| return false; | return false; | ||||
| @@ -642,7 +570,7 @@ bool BlockMemAssigner::IsPostReuse(const MemoryBlock *mem_block) const { | |||||
| if (mem_block == nullptr) { | if (mem_block == nullptr) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| for (const auto &symbol : mem_block->SymbolList()) { | |||||
| for (auto &symbol : mem_block->SymbolList()) { | |||||
| auto iter = post_reuse_flag_.find(symbol); | auto iter = post_reuse_flag_.find(symbol); | ||||
| if (iter == post_reuse_flag_.end()) { | if (iter == post_reuse_flag_.end()) { | ||||
| continue; | continue; | ||||
| @@ -665,7 +593,8 @@ bool BlockMemAssigner::IsSymbolExist(const NodeIndexIO &node_index_io) { | |||||
| if (iter == anchor_to_symbol_.end()) { | if (iter == anchor_to_symbol_.end()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| return symbol_blocks_.find(iter->second) != symbol_blocks_.end(); | |||||
| std::string symbol = iter->second; | |||||
| return symbol_blocks_.find(symbol) != symbol_blocks_.end(); | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -674,10 +603,10 @@ bool BlockMemAssigner::IsSymbolExist(const NodeIndexIO &node_index_io) { | |||||
| /// @return void | /// @return void | ||||
| /// | /// | ||||
| void BlockMemAssigner::PrintSymbolMap() { | void BlockMemAssigner::PrintSymbolMap() { | ||||
| for (const auto &pair : symbol_to_anchors_) { | |||||
| for (auto &pair : symbol_to_anchors_) { | |||||
| GELOGD("symbol=%s, max_size=%zu, pre_reuse=%s, post_reuse=%s", pair.first.c_str(), symbol_size_[pair.first], | GELOGD("symbol=%s, max_size=%zu, pre_reuse=%s, post_reuse=%s", pair.first.c_str(), symbol_size_[pair.first], | ||||
| pre_reuse_flag_[pair.first] ? "true" : "false", post_reuse_flag_[pair.first] ? "true" : "false"); | pre_reuse_flag_[pair.first] ? "true" : "false", post_reuse_flag_[pair.first] ? "true" : "false"); | ||||
| for (const auto &node_index_io : pair.second) { | |||||
| for (auto &node_index_io : pair.second) { | |||||
| GELOGD("anchor:%s", node_index_io.ToString().c_str()); | GELOGD("anchor:%s", node_index_io.ToString().c_str()); | ||||
| } | } | ||||
| } | } | ||||
| @@ -693,14 +622,15 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| bool is_reuse_memory = false; | bool is_reuse_memory = false; | ||||
| string ge_disable_reuse_mem_env = "0"; | string ge_disable_reuse_mem_env = "0"; | ||||
| (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env); | |||||
| (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env); | |||||
| if (ge_disable_reuse_mem_env != "1") { | if (ge_disable_reuse_mem_env != "1") { | ||||
| bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | ||||
| is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | ||||
| reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | ||||
| auto stream_id = node_op_desc->GetStreamId(); | auto stream_id = node_op_desc->GetStreamId(); | ||||
| if (is_reuse_memory) { | |||||
| for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { | |||||
| auto map_iter = reusable_streams_map_.find(stream_id); | |||||
| if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { | |||||
| for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | |||||
| MemoryBlock *reusable_block = *it; | MemoryBlock *reusable_block = *it; | ||||
| if (!IsPostReuse(reusable_block)) { | if (!IsPostReuse(reusable_block)) { | ||||
| reusable_block->reuse_mem_ = false; | reusable_block->reuse_mem_ = false; | ||||
| @@ -710,7 +640,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| // A node can reuse blocks of the same stream and preorder streams | // A node can reuse blocks of the same stream and preorder streams | ||||
| auto id = GetAtomicAddrCleanId(); | auto id = GetAtomicAddrCleanId(); | ||||
| if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id)) { | |||||
| if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id) && | |||||
| CanReuseByStream(map_iter->second, *reusable_block)) { | |||||
| GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_, | |||||
| stream_id); | |||||
| reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | ||||
| if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
| auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
| @@ -721,7 +654,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| reusable_block->continuous_block_ = continuous; | reusable_block->continuous_block_ = continuous; | ||||
| reusable_block->ref_count_++; | reusable_block->ref_count_++; | ||||
| ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); | ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); | ||||
| reusable_blocks_[stream_id].erase(it); | |||||
| reusable_blocks_.erase(it); | |||||
| return reusable_block; | return reusable_block; | ||||
| } | } | ||||
| } | } | ||||
| @@ -767,7 +700,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| "Get no align size failed"); | "Get no align size failed"); | ||||
| if (IsSymbolExist(node_index_io)) { | if (IsSymbolExist(node_index_io)) { | ||||
| const std::string &symbol = anchor_to_symbol_[node_index_io.ToString()]; | |||||
| std::string symbol = anchor_to_symbol_[node_index_io.ToString()]; | |||||
| block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
| block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | ||||
| block->ref_count_++; | block->ref_count_++; | ||||
| @@ -990,7 +923,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | ||||
| // Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
| GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
| ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) | |||||
| ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);) | |||||
| for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | ||||
| int64_t size = 0; | int64_t size = 0; | ||||
| auto output_op_desc = op_desc->GetOutputDescPtr(i); | auto output_op_desc = op_desc->GetOutputDescPtr(i); | ||||
| @@ -1044,7 +977,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| /// @return Status result | /// @return Status result | ||||
| /// | /// | ||||
| void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | ||||
| (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_); | |||||
| // Init reusable streams map | |||||
| InitReusableStreamMap(); | |||||
| (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env_); | |||||
| GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | ||||
| string op_no_reuse_mem_str; | string op_no_reuse_mem_str; | ||||
| const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | ||||
| @@ -1097,7 +1033,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | ||||
| CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); | CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); | ||||
| } | } | ||||
| ReleaseInputNodeOutMemory(node_out_blocks_, reusable_blocks_[stream_id], n); | |||||
| ReleaseInputNodeOutMemory(node_out_blocks_, reusable_blocks_, n); | |||||
| } | } | ||||
| GELOGD("Assigned memory blocks:"); | GELOGD("Assigned memory blocks:"); | ||||
| @@ -1108,7 +1044,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| bool merge_dynamic_batch = false; | bool merge_dynamic_batch = false; | ||||
| GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();) | GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();) | ||||
| GE_IF_BOOL_EXEC((!(ge_disable_reuse_mem_env_ == "1") && !merge_dynamic_batch), ReuseBlocksByLifeTime(ranges.size());) | |||||
| GE_IF_BOOL_EXEC(!merge_dynamic_batch, ReuseBlocksByLifeTime();) | |||||
| AssignContinuousBlocks(); | AssignContinuousBlocks(); | ||||
| ResizeMemoryBlocks(); | ResizeMemoryBlocks(); | ||||
| @@ -1285,11 +1221,7 @@ void BlockMemAssigner::AssignContinuousBlocks() { | |||||
| } | } | ||||
| } | } | ||||
| void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||||
| // 1 means block size is same so no need to do this | |||||
| if (range_size <= 1) { | |||||
| return; | |||||
| } | |||||
| void BlockMemAssigner::ReuseBlocksByLifeTime() { | |||||
| for (size_t i = 0; i < memory_blocks_.size(); ++i) { | for (size_t i = 0; i < memory_blocks_.size(); ++i) { | ||||
| auto parent = memory_blocks_[i]; | auto parent = memory_blocks_[i]; | ||||
| if (parent == nullptr || parent->deleted_block_) { | if (parent == nullptr || parent->deleted_block_) { | ||||
| @@ -1299,7 +1231,7 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||||
| parent->reuse_mem_ = false; | parent->reuse_mem_ = false; | ||||
| } | } | ||||
| for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | ||||
| parent->AddLifeReuseBlock(memory_blocks_[j], total_node_depend_stream_life_); | |||||
| parent->AddLifeReuseBlock(memory_blocks_[j]); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -1386,10 +1318,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz | |||||
| } | } | ||||
| GELOGI( | GELOGI( | ||||
| "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | ||||
| " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", | |||||
| " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d] isref[%d].", | |||||
| graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, | graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, | ||||
| op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, | op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, | ||||
| block->reuse_mem_, block->continuous_block_, block->deleted_block_, node_type.ref_input); | |||||
| node_type.ref_input); | |||||
| } | } | ||||
| void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | ||||
| @@ -1448,6 +1380,139 @@ Status BlockMemAssigner::Assign() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void BlockMemAssigner::InitReusableStreamMap() { | |||||
| // save a stream's id and its first Node and last node. | |||||
| map<int64_t, pair<NodePtr, NodePtr>> stream_head_tail_node_map; | |||||
| // save a stream's id and its directly child stream. | |||||
| map<int64_t, unordered_set<int64_t>> stream_dependency_map; | |||||
| // save a stream's id and its occupied memory. | |||||
| unordered_map<int64_t, int64_t> stream_mem_map; | |||||
| // Find streams's first and last node. | |||||
| FindHeadAndTailNodesForStream(stream_head_tail_node_map, stream_mem_map); | |||||
| // If streamB's first node is the output of streamA's last node, then B depends on A. | |||||
| FindDependentStream(stream_head_tail_node_map, stream_dependency_map); | |||||
| // If a stream has more than one child stream, select the one that occupies the closest memory | |||||
| for (const auto &iter : stream_dependency_map) { | |||||
| if (iter.second.empty()) { | |||||
| continue; | |||||
| } | |||||
| int64_t target_size = stream_mem_map[iter.first]; | |||||
| int64_t min_size_gap = LONG_MAX; | |||||
| int64_t target_reuse_stream_id = 0; | |||||
| for (auto id : iter.second) { | |||||
| if (labs(stream_mem_map[id] - target_size) < min_size_gap) { | |||||
| target_reuse_stream_id = id; | |||||
| min_size_gap = labs(stream_mem_map[id] - target_size); | |||||
| } | |||||
| } | |||||
| // If b can reuse a, then b should also be able to reuse all blocks that a can reuse. | |||||
| reusable_streams_map_[target_reuse_stream_id].insert(reusable_streams_map_[iter.first].begin(), | |||||
| reusable_streams_map_[iter.first].end()); | |||||
| } | |||||
| } | |||||
| void BlockMemAssigner::FindHeadAndTailNodesForStream(map<int64_t, pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
| unordered_map<int64_t, int64_t> &stream_mem_map) { | |||||
| for (const auto &n : compute_graph_->GetAllNodes()) { | |||||
| GE_IF_BOOL_EXEC(n->GetOpDesc() == nullptr, GELOGW("Op desc is nullptr"); continue); | |||||
| auto stream_id = n->GetOpDesc()->GetStreamId(); | |||||
| // traverse to find streams's first and last node. | |||||
| if (stream_head_tail_node_map.find(stream_id) == stream_head_tail_node_map.end()) { | |||||
| stream_head_tail_node_map[stream_id] = std::make_pair(n, n); | |||||
| reusable_streams_map_[stream_id].insert(stream_id); // a node can reuse blocks from same stream. | |||||
| } else { | |||||
| stream_head_tail_node_map[stream_id].second = n; | |||||
| } | |||||
| // Accumulate the output size of the node in the stream. | |||||
| for (size_t i = 0; i < n->GetOpDesc()->GetOutputsSize(); i++) { | |||||
| int64_t size = 0; | |||||
| if (ge::TensorUtils::GetSize(*n->GetOpDesc()->GetOutputDescPtr(static_cast<uint32_t>(i)), size) != SUCCESS) { | |||||
| GELOGW("Get output size failed!"); | |||||
| continue; | |||||
| } | |||||
| stream_mem_map[stream_id] += size; | |||||
| } | |||||
| // Accumulate the workspace size of the node in the stream. | |||||
| for (auto size : n->GetOpDesc()->GetWorkspaceBytes()) { | |||||
| stream_mem_map[stream_id] += size; | |||||
| } | |||||
| } | |||||
| } | |||||
| void BlockMemAssigner::FindDependentStream(map<int64_t, pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
| map<int64_t, unordered_set<int64_t>> &stream_dependency_map) { | |||||
| for (const auto &it1 : stream_head_tail_node_map) { | |||||
| for (const auto &it2 : stream_head_tail_node_map) { | |||||
| if (it1 == it2) { | |||||
| continue; | |||||
| } | |||||
| NodePtr pre_node = it1.second.second; | |||||
| NodePtr post_node = it2.second.first; | |||||
| std::vector<NodePtr> out_nodes; | |||||
| // Direct link out_node | |||||
| for (const auto &out_node : pre_node->GetOutNodes()) { | |||||
| if ((out_node->GetOpDesc() == nullptr) || (post_node->GetOpDesc() == nullptr) || | |||||
| (pre_node->GetOpDesc() == nullptr)) { | |||||
| continue; | |||||
| } | |||||
| out_nodes.emplace_back(out_node); | |||||
| } | |||||
| FindDependentStreamBetweenGraphs(pre_node, out_nodes); | |||||
| for (auto &out_node : out_nodes) { | |||||
| if (out_node->GetOpDesc()->GetId() == post_node->GetOpDesc()->GetId()) { | |||||
| stream_dependency_map[pre_node->GetOpDesc()->GetStreamId()].insert(post_node->GetOpDesc()->GetStreamId()); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| /// | |||||
| /// @ingroup GE | |||||
| /// @brief Find dependent link between parent_graph and sub_graph | |||||
| /// @param [in] pre_node | |||||
| /// @param [out] out_nodes | |||||
| /// @return void | |||||
| /// @author | |||||
| /// | |||||
| void BlockMemAssigner::FindDependentStreamBetweenGraphs(const NodePtr &pre_node, std::vector<NodePtr> &out_nodes) { | |||||
| if ((pre_node == nullptr) || (pre_node->GetOpDesc() == nullptr)) { | |||||
| return; | |||||
| } | |||||
| // FunctionOp & subgraph input | |||||
| std::vector<std::string> subgraph_names = pre_node->GetOpDesc()->GetSubgraphInstanceNames(); | |||||
| for (auto &subgraph_name : subgraph_names) { | |||||
| ComputeGraphPtr subgraph = compute_graph_->GetSubgraph(subgraph_name); | |||||
| if (subgraph == nullptr) { | |||||
| continue; | |||||
| } | |||||
| for (auto &node : subgraph->GetDirectNode()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (op_desc->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) { | |||||
| out_nodes.emplace_back(node); | |||||
| } | |||||
| } | |||||
| } | |||||
| // subgraph output & parent_node output | |||||
| if (NodeUtils::IsSubgraphOutput(pre_node)) { | |||||
| NodePtr parent_node = pre_node->GetOwnerComputeGraph()->GetParentNode(); | |||||
| for (const auto &out_node : parent_node->GetOutNodes()) { | |||||
| out_nodes.emplace_back(out_node); | |||||
| } | |||||
| } | |||||
| } | |||||
| bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
| return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
| (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | ||||
| @@ -34,8 +34,6 @@ | |||||
| namespace ge { | namespace ge { | ||||
| const size_t kMaxLifeTime = 0xffffffff; | const size_t kMaxLifeTime = 0xffffffff; | ||||
| using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | |||||
| enum MemoryType { kOutput, kWorkspace }; | enum MemoryType { kOutput, kWorkspace }; | ||||
| struct NodeTypeIndex { | struct NodeTypeIndex { | ||||
| @@ -118,7 +116,7 @@ class MemoryBlock { | |||||
| bool IsSameLabel(std::string &first_batch_label); | bool IsSameLabel(std::string &first_batch_label); | ||||
| void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life); | |||||
| void AddLifeReuseBlock(MemoryBlock *block); | |||||
| void SetLifeTimeEnd(size_t time); | void SetLifeTimeEnd(size_t time); | ||||
| @@ -126,10 +124,6 @@ class MemoryBlock { | |||||
| size_t GetLifeEnd(); | size_t GetLifeEnd(); | ||||
| void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); | |||||
| size_t GetDependLifeBegin(int64_t stream_id, DependStreamLife &node_depend_stream_life); | |||||
| int ref_count_; | int ref_count_; | ||||
| int64_t stream_id_; | int64_t stream_id_; | ||||
| bool deleted_block_; | bool deleted_block_; | ||||
| @@ -200,6 +194,47 @@ class BlockMemAssigner : public MemAssigner { | |||||
| void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory); | void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory); | ||||
| /// | |||||
| /// @ingroup GE | |||||
| /// @brief Traversing the compute_graph_ to find the reuse relationship between streams | |||||
| /// @param [in] reusable_stream_map map to save stream_id and its reusable stream_ids | |||||
| /// @return void | |||||
| /// @author | |||||
| /// | |||||
| void InitReusableStreamMap(); | |||||
| /// | |||||
| /// @ingroup GE | |||||
| /// @brief Traversing the compute_graph_ to find the first and last nodeptr of a stream. | |||||
| /// @param [in] stream_head_tail_node_map map to save stream_id and its first and last nodeptr. | |||||
| /// @param [in] stream_mem_map map to save stream_id and its memory capacity. | |||||
| /// @return void | |||||
| /// @author | |||||
| /// | |||||
| void FindHeadAndTailNodesForStream(std::map<int64_t, std::pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
| std::unordered_map<int64_t, int64_t> &stream_mem_map); | |||||
| /// | |||||
| /// @ingroup GE | |||||
| /// @brief Traversing the compute_graph_ to find the reuse relationship between streams. | |||||
| /// @param [in] stream_head_tail_node_map map to save stream_id and its first and last nodeptr. | |||||
| /// @param [in] stream_dependency_map map to save stream_id and stream_ids depends on it. | |||||
| /// @return void | |||||
| /// @author | |||||
| /// | |||||
| void FindDependentStream(std::map<int64_t, std::pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
| std::map<int64_t, std::unordered_set<int64_t>> &stream_dependency_map); | |||||
| /// | |||||
| /// @ingroup GE | |||||
| /// @brief Find dependent link between parent_graph and sub_graph | |||||
| /// @param [in] pre_node | |||||
| /// @param [out] out_nodes | |||||
| /// @return void | |||||
| /// @author | |||||
| /// | |||||
| void FindDependentStreamBetweenGraphs(const NodePtr &pre_node, std::vector<NodePtr> &out_nodes); | |||||
| /// | /// | ||||
| /// @ingroup GE | /// @ingroup GE | ||||
| /// @brief Determine whether it is the type of zero memory node. | /// @brief Determine whether it is the type of zero memory node. | ||||
| @@ -360,9 +395,9 @@ class BlockMemAssigner : public MemAssigner { | |||||
| /// @return void | /// @return void | ||||
| /// @author | /// @author | ||||
| /// | /// | ||||
| void ReuseBlocksByLifeTime(size_t range_size); | |||||
| void ReuseBlocksByLifeTime(); | |||||
| std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_; | |||||
| std::vector<MemoryBlock *> reusable_blocks_; | |||||
| std::map<std::string, uint64_t> reusable_block_counts_; | std::map<std::string, uint64_t> reusable_block_counts_; | ||||
| @@ -376,6 +411,9 @@ class BlockMemAssigner : public MemAssigner { | |||||
| std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | ||||
| // save stream_id and reusable stream_ids | |||||
| std::unordered_map<int64_t, std::unordered_set<int64_t>> reusable_streams_map_; | |||||
| // reuse memory | // reuse memory | ||||
| vector<string> op_no_reuse_mem_vec_; | vector<string> op_no_reuse_mem_vec_; | ||||
| @@ -388,8 +426,6 @@ class BlockMemAssigner : public MemAssigner { | |||||
| size_t life_time_; | size_t life_time_; | ||||
| int64_t atomic_addr_clean_id_ = 0; | int64_t atomic_addr_clean_id_ = 0; | ||||
| DependStreamLife total_node_depend_stream_life_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_ | #endif // GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_ | ||||
| @@ -222,10 +222,9 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offse | |||||
| mem_offset = memory_offset_[0].mem_offset_; | mem_offset = memory_offset_[0].mem_offset_; | ||||
| auto session_id = compute_graph_->GetSessionID(); | |||||
| if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | |||||
| if (mem_offset > VarManager::Instance(0)->GetGraphMemoryMaxSize()) { | |||||
| GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, | GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, | ||||
| VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | |||||
| VarManager::Instance(0)->GetGraphMemoryMaxSize()); | |||||
| return ge::FAILED; | return ge::FAILED; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -1223,16 +1222,10 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
| peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | ||||
| input_list.back()); | input_list.back()); | ||||
| } else { | } else { | ||||
| int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | |||||
| if (peer_out_anchor->GetOwnerNode()->GetType() == CONSTANT) { | |||||
| GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | |||||
| GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | |||||
| } | |||||
| GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | ||||
| input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | ||||
| output_offset); | |||||
| input_list.emplace_back(output_offset); | |||||
| output_list.at(peer_out_anchor->GetIdx())); | |||||
| input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx())); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -299,33 +299,21 @@ Status VarMemAssignUtil::SetOutTransNodeToAssign(const ge::NodePtr &node, const | |||||
| Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph) { | Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph) { | ||||
| for (const ge::NodePtr &n : compute_graph->GetAllNodes()) { | for (const ge::NodePtr &n : compute_graph->GetAllNodes()) { | ||||
| string ref_var_src_var_name; | string ref_var_src_var_name; | ||||
| auto op_desc = n->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| for (uint32_t idx = 0; idx < op_desc->GetOutputsSize(); idx += 1) { | |||||
| const auto out_desc = op_desc->MutableOutputDesc(idx); | |||||
| if (ge::AttrUtils::GetStr(out_desc, REF_VAR_SRC_VAR_NAME, ref_var_src_var_name)) { | |||||
| GE_CHK_STATUS_RET(AssignData2VarRef(n, ref_var_src_var_name, compute_graph->GetSessionID(), idx)); | |||||
| } | |||||
| } | |||||
| GE_CHECK_NOTNULL(n->GetOpDesc()); | |||||
| bool is_ref = ge::AttrUtils::GetStr(n->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name); | |||||
| GE_IF_BOOL_EXEC(is_ref, | |||||
| GE_CHK_STATUS_RET(AssignData2VarRef(n, ref_var_src_var_name, compute_graph->GetSessionID()))); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, const string &src_var_name, | Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, const string &src_var_name, | ||||
| uint64_t session_id, uint32_t out_index) { | |||||
| // Get ref_var_src_var address | |||||
| auto root_graph = GraphUtils::FindRootGraph(has_ref_attr_node->GetOwnerComputeGraph()); | |||||
| GE_CHECK_NOTNULL(root_graph); | |||||
| ge::NodePtr var_ref_src_var = root_graph->FindNode(src_var_name); | |||||
| if (var_ref_src_var == nullptr) { | |||||
| for (auto sub_graph : root_graph->GetAllSubgraphs()) { | |||||
| auto node_ptr = sub_graph->FindNode(src_var_name); | |||||
| if (node_ptr != nullptr) { | |||||
| var_ref_src_var = node_ptr; | |||||
| break; | |||||
| } | |||||
| } | |||||
| uint64_t session_id) { | |||||
| if (!TransOpUtil::IsTransOp(has_ref_attr_node)) { | |||||
| return SUCCESS; | |||||
| } | } | ||||
| // Get ref_var_src_var address | |||||
| ge::NodePtr var_ref_src_var = has_ref_attr_node->GetOwnerComputeGraph()->FindNode(src_var_name); | |||||
| GE_IF_BOOL_EXEC(var_ref_src_var == nullptr || var_ref_src_var->GetOpDesc() == nullptr, return FAILED); | GE_IF_BOOL_EXEC(var_ref_src_var == nullptr || var_ref_src_var->GetOpDesc() == nullptr, return FAILED); | ||||
| GeTensorDesc src_tensor_desc = var_ref_src_var->GetOpDesc()->GetOutputDesc(0); | GeTensorDesc src_tensor_desc = var_ref_src_var->GetOpDesc()->GetOutputDesc(0); | ||||
| uint8_t *dev_ptr = nullptr; | uint8_t *dev_ptr = nullptr; | ||||
| @@ -334,8 +322,14 @@ Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, | |||||
| vector<int64_t> ref_attr_node_output_list = has_ref_attr_node->GetOpDesc()->GetOutputOffset(); | vector<int64_t> ref_attr_node_output_list = has_ref_attr_node->GetOpDesc()->GetOutputOffset(); | ||||
| GE_CHECK_SIZE(ref_attr_node_output_list.size()); | GE_CHECK_SIZE(ref_attr_node_output_list.size()); | ||||
| GE_CHK_BOOL_RET_STATUS(out_index < ref_attr_node_output_list.size(), FAILED, | |||||
| "out_index %u >= ref_attr_node_output_list.size() %zu", out_index, | |||||
| int out_index = 0; | |||||
| bool is_get = ge::AttrUtils::GetInt(var_ref_src_var->GetOpDesc(), REF_VAR_PRE_PEER_OUT_INDEX, out_index); | |||||
| if (!is_get) { | |||||
| GELOGI("%s failed to get attr [REF_VAR_PRE_PEER_OUT_INDEX]", var_ref_src_var->GetName().c_str()); | |||||
| } | |||||
| GE_CHK_BOOL_RET_STATUS(static_cast<size_t>(out_index) < ref_attr_node_output_list.size(), FAILED, | |||||
| "out_index %d >= ref_attr_node_output_list.size() %zu", out_index, | |||||
| ref_attr_node_output_list.size()); | ref_attr_node_output_list.size()); | ||||
| ref_attr_node_output_list[out_index] = static_cast<int64_t>(reinterpret_cast<uintptr_t>(dev_ptr)); | ref_attr_node_output_list[out_index] = static_cast<int64_t>(reinterpret_cast<uintptr_t>(dev_ptr)); | ||||
| @@ -46,8 +46,8 @@ class VarMemAssignUtil { | |||||
| static Status DealTransNode(const ge::NodePtr &final_trans_node); | static Status DealTransNode(const ge::NodePtr &final_trans_node); | ||||
| static Status DealExportTransNode(const ge::NodePtr &node, const ge::NodePtr &final_trans_node); | static Status DealExportTransNode(const ge::NodePtr &node, const ge::NodePtr &final_trans_node); | ||||
| static Status AssignData2VarRef(const ge::NodePtr &variable_ref, const std::string &src_var_name, uint64_t session_id, | |||||
| uint32_t out_index); | |||||
| static Status AssignData2VarRef(const ge::NodePtr &variable_ref, const std::string &src_var_name, | |||||
| uint64_t session_id); | |||||
| static Status SetOutTransNodeToAssign(const ge::NodePtr &node, const ge::NodePtr &final_trans_node, size_t index); | static Status SetOutTransNodeToAssign(const ge::NodePtr &node, const ge::NodePtr &final_trans_node, size_t index); | ||||
| }; | }; | ||||
| @@ -15,10 +15,10 @@ | |||||
| */ | */ | ||||
| #include "graph/build/model_builder.h" | #include "graph/build/model_builder.h" | ||||
| #include <securectype.h> | |||||
| #include <iostream> | #include <iostream> | ||||
| #include <set> | #include <set> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <securectype.h> | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "graph/anchor.h" | #include "graph/anchor.h" | ||||
| @@ -27,7 +27,6 @@ | |||||
| #include "graph/build/label_allocator.h" | #include "graph/build/label_allocator.h" | ||||
| #include "graph/build/stream_allocator.h" | #include "graph/build/stream_allocator.h" | ||||
| #include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
| #include "graph/common/ge_call_wrapper.h" | |||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/ge_attr_value.h" | #include "graph/ge_attr_value.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| @@ -86,11 +85,9 @@ bool IsGeLocalOp(const ge::ConstOpDescPtr &op_desc) { | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| ModelBuilder::ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr compute_graph, | |||||
| const Graph2SubGraphInfoList &subgraphs, const map<string, int> &stream_max_parallel_num, | |||||
| bool hcom_parallel, int mode) | |||||
| : session_id_(session_id), | |||||
| mem_offset_(0), | |||||
| ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const Graph2SubGraphInfoList &subgraphs, | |||||
| const map<string, int> &stream_max_parallel_num, bool hcom_parallel, int mode) | |||||
| : mem_offset_(0), | |||||
| weight_offset_(kWeightsStartOffset), | weight_offset_(kWeightsStartOffset), | ||||
| compute_graph_(std::move(compute_graph)), | compute_graph_(std::move(compute_graph)), | ||||
| subgraphs_(subgraphs), | subgraphs_(subgraphs), | ||||
| @@ -245,7 +242,7 @@ Status ModelBuilder::SetInputOutputDesc() { | |||||
| Status ret; | Status ret; | ||||
| GELOGI("Start to SetInputOutputDesc."); | GELOGI("Start to SetInputOutputDesc."); | ||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
| @@ -294,7 +291,7 @@ Status ModelBuilder::SetInputOutputDesc() { | |||||
| } | } | ||||
| void ModelBuilder::AddNodeInputProperty() { | void ModelBuilder::AddNodeInputProperty() { | ||||
| for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
| auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | ||||
| vector<string> src_name_list; | vector<string> src_name_list; | ||||
| @@ -321,7 +318,7 @@ void ModelBuilder::AddNodeInputProperty() { | |||||
| node_op_desc->SetSrcIndex(src_index_list); | node_op_desc->SetSrcIndex(src_index_list); | ||||
| } | } | ||||
| for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
| auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | ||||
| GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); | GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); | ||||
| @@ -359,7 +356,7 @@ void ModelBuilder::AddNodeInputProperty() { | |||||
| Status ModelBuilder::AdjustInputTensorFlag() { | Status ModelBuilder::AdjustInputTensorFlag() { | ||||
| GELOGI("Start to AdjustInputTensorFlag."); | GELOGI("Start to AdjustInputTensorFlag."); | ||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
| if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | ||||
| GELOGD("Data node: %s.", n->GetName().c_str()); | GELOGD("Data node: %s.", n->GetName().c_str()); | ||||
| for (const auto &anchor : n->GetAllOutDataAnchors()) { | for (const auto &anchor : n->GetAllOutDataAnchors()) { | ||||
| @@ -435,21 +432,6 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(&model, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(&model, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_), | ||||
| GELOGE(FAILED, "SetBool of ATTR_NAME_SWITCH_FOR_L1_FUSION failed."); | GELOGE(FAILED, "SetBool of ATTR_NAME_SWITCH_FOR_L1_FUSION failed."); | ||||
| return FAILED); | return FAILED); | ||||
| const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(session_id_); | |||||
| bool is_op_debug = dump_properties.IsOpDebugOpen(); | |||||
| GELOGI("Get op debug:%d", is_op_debug); | |||||
| if (is_op_debug) { | |||||
| if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) { | |||||
| GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed."); | |||||
| return FAILED; | |||||
| } | |||||
| uint32_t op_debug_mode = dump_properties.GetOpDebugMode(); | |||||
| GELOGI("Get op debug mode:%d", op_debug_mode); | |||||
| if (!ge::AttrUtils::SetInt(&model, ATTR_OP_DEBUG_MODE, op_debug_mode)) { | |||||
| GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_MODE failed."); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| model.SetName(compute_graph_->GetName()); | model.SetName(compute_graph_->GetName()); | ||||
| model.SetGraph(ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph_)); | model.SetGraph(ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph_)); | ||||
| @@ -466,7 +448,7 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
| } | } | ||||
| void ModelBuilder::ClearOriginalFormat() { | void ModelBuilder::ClearOriginalFormat() { | ||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| if (node_op_desc != nullptr) { | if (node_op_desc != nullptr) { | ||||
| if (node_op_desc->HasAttr(ATTR_NAME_FORMAT)) { | if (node_op_desc->HasAttr(ATTR_NAME_FORMAT)) { | ||||
| @@ -505,7 +487,7 @@ Status ModelBuilder::MergeWeights() { | |||||
| weight_buffer_ = buffer; | weight_buffer_ = buffer; | ||||
| auto base_addr = weight_buffer_.GetData(); | auto base_addr = weight_buffer_.GetData(); | ||||
| for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | ||||
| if (node->GetType() != CONSTANT) { | if (node->GetType() != CONSTANT) { | ||||
| @@ -545,8 +527,8 @@ Status ModelBuilder::MergeWeights() { | |||||
| weight_data.size()); | weight_data.size()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| uintptr_t dst_ptr = reinterpret_cast<uintptr_t>(base_addr) + offset; | |||||
| uintptr_t src_ptr = reinterpret_cast<uintptr_t>(weight_data.data()); | |||||
| uintptr_t dst_ptr = (uintptr_t)base_addr + offset; | |||||
| uintptr_t src_ptr = (uintptr_t)weight_data.data(); | |||||
| size_t left_size = weight_data.size(); | size_t left_size = weight_data.size(); | ||||
| while (left_size > SECUREC_MEM_MAX_LEN) { | while (left_size > SECUREC_MEM_MAX_LEN) { | ||||
| auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr), | auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr), | ||||
| @@ -583,7 +565,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
| // Add TBE Kernels | // Add TBE Kernels | ||||
| std::set<std::string> name_set; | std::set<std::string> name_set; | ||||
| for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
| TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | ||||
| @@ -677,7 +659,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { | |||||
| // Compile single op in graph build stage | // Compile single op in graph build stage | ||||
| GE_TIMESTAMP_START(CompileSingleOp); | GE_TIMESTAMP_START(CompileSingleOp); | ||||
| GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | ||||
| GE_TIMESTAMP_EVENT_END(CompileSingleOp, "GraphBuilder::CompileSingleOp"); | |||||
| GE_TIMESTAMP_END(CompileSingleOp, "GraphBuilder::CompileSingleOp"); | |||||
| // Refresh real streams and insert event nodes. | // Refresh real streams and insert event nodes. | ||||
| GE_TIMESTAMP_START(RefreshRealStream); | GE_TIMESTAMP_START(RefreshRealStream); | ||||
| @@ -718,7 +700,7 @@ Status ModelBuilder::CompileSingleOp() { | |||||
| GE_TIMESTAMP_CALLNUM_START(BatchCompileOp); | GE_TIMESTAMP_CALLNUM_START(BatchCompileOp); | ||||
| std::unordered_map<string, vector<ge::NodePtr>> node_vector_map; | std::unordered_map<string, vector<ge::NodePtr>> node_vector_map; | ||||
| for (auto &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
| for (auto &node : compute_graph_->GetAllNodes()) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| continue; | continue; | ||||
| @@ -755,7 +737,7 @@ Status ModelBuilder::CompileSingleOp() { | |||||
| GE_CHECK_NOTNULL(kernel_info); | GE_CHECK_NOTNULL(kernel_info); | ||||
| GE_TIMESTAMP_RESTART(BatchCompileOp); | GE_TIMESTAMP_RESTART(BatchCompileOp); | ||||
| auto ret = kernel_info->CompileOp(node_vector); | auto ret = kernel_info->CompileOp(node_vector); | ||||
| GELOGI("[GEPERFTRACE] The node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size()); | |||||
| GEEVENT("[GEPERFTRACE] The node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size()); | |||||
| GE_TIMESTAMP_ADD(BatchCompileOp); | GE_TIMESTAMP_ADD(BatchCompileOp); | ||||
| if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
| GELOGE(ret, "Compile op failed, kernel lib name is %s", kernel_lib_name.c_str()); | GELOGE(ret, "Compile op failed, kernel lib name is %s", kernel_lib_name.c_str()); | ||||
| @@ -37,7 +37,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| class ModelBuilder { | class ModelBuilder { | ||||
| public: | public: | ||||
| ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr whole_graph, const Graph2SubGraphInfoList &subgraphs, | |||||
| ModelBuilder(ge::ComputeGraphPtr whole_graph, const Graph2SubGraphInfoList &subgraphs, | |||||
| const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, | const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, | ||||
| int mode = static_cast<int>(domi::BuildMode::GEN_TASK_WITHOUT_FUSION)); | int mode = static_cast<int>(domi::BuildMode::GEN_TASK_WITHOUT_FUSION)); | ||||
| @@ -82,8 +82,6 @@ class ModelBuilder { | |||||
| Status CompileSingleOp(); | Status CompileSingleOp(); | ||||
| uint64_t session_id_; | |||||
| size_t mem_offset_; | size_t mem_offset_; | ||||
| size_t weight_offset_; | size_t weight_offset_; | ||||
| @@ -173,4 +173,5 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra | |||||
| } | } | ||||
| RunContext &RunContextUtil::GetRunContext() { return run_context_; } | RunContext &RunContextUtil::GetRunContext() { return run_context_; } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -146,6 +146,12 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu | |||||
| return status; | return status; | ||||
| } | } | ||||
| status = AddActiveEntryStream(); | |||||
| if (status != SUCCESS) { | |||||
| GELOGE(status, "AddActiveEntryStream failed!"); | |||||
| return status; | |||||
| } | |||||
| status = RefreshContinuousEvents(); | status = RefreshContinuousEvents(); | ||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(status, "RefreshContinuousEvents failed!"); | GELOGE(status, "RefreshContinuousEvents failed!"); | ||||
| @@ -161,7 +167,7 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu | |||||
| DumpEvents(); | DumpEvents(); | ||||
| GE_DUMP(whole_graph_, "AfterRefreshRealStream"); | GE_DUMP(whole_graph_, "AfterRefreshRealStream"); | ||||
| for (const NodePtr &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const NodePtr &node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| auto stream_id = node->GetOpDesc()->GetStreamId(); | auto stream_id = node->GetOpDesc()->GetStreamId(); | ||||
| if (stream_id == kInvalidStream) { | if (stream_id == kInvalidStream) { | ||||
| @@ -193,7 +199,7 @@ Status StreamAllocator::AssignSingleStream() { | |||||
| } | } | ||||
| int64_t task_count = 0; | int64_t task_count = 0; | ||||
| for (const NodePtr &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const NodePtr &node : whole_graph_->GetAllNodes()) { | |||||
| string op_type = node->GetType(); | string op_type = node->GetType(); | ||||
| if (IsHcclOp(op_type)) { | if (IsHcclOp(op_type)) { | ||||
| task_count += kTaskNumPerHcclNode; | task_count += kTaskNumPerHcclNode; | ||||
| @@ -230,7 +236,7 @@ Status StreamAllocator::AssignSingleStream() { | |||||
| } | } | ||||
| Status StreamAllocator::SetActiveStreamsByLabel() { | Status StreamAllocator::SetActiveStreamsByLabel() { | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| string stream_label; | string stream_label; | ||||
| @@ -242,7 +248,7 @@ Status StreamAllocator::SetActiveStreamsByLabel() { | |||||
| } | } | ||||
| } | } | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| vector<string> activated_label_list; | vector<string> activated_label_list; | ||||
| if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | ||||
| @@ -320,7 +326,7 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() { | |||||
| // Insert the send/recv event id to the graph | // Insert the send/recv event id to the graph | ||||
| Status StreamAllocator::InsertSyncEvents() { | Status StreamAllocator::InsertSyncEvents() { | ||||
| for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &cur_node : whole_graph_->GetAllNodes()) { | |||||
| // Take the adjacent points, then judge whether need to insert the event | // Take the adjacent points, then judge whether need to insert the event | ||||
| for (const OutDataAnchorPtr &anchor : cur_node->GetAllOutDataAnchors()) { | for (const OutDataAnchorPtr &anchor : cur_node->GetAllOutDataAnchors()) { | ||||
| for (const InDataAnchorPtr &peer_in_anchor : anchor->GetPeerInDataAnchors()) { | for (const InDataAnchorPtr &peer_in_anchor : anchor->GetPeerInDataAnchors()) { | ||||
| @@ -374,11 +380,6 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if ((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) { | |||||
| GELOGD("No need to insert event after enter_node %s.", cur_node->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (next_stream_id == kInvalidStream) { | if (next_stream_id == kInvalidStream) { | ||||
| GELOGE(FAILED, "Stream id of next_node %s should not be %ld", next_node->GetName().c_str(), kInvalidStream); | GELOGE(FAILED, "Stream id of next_node %s should not be %ld", next_node->GetName().c_str(), kInvalidStream); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -445,7 +446,7 @@ Status StreamAllocator::InsertEventsForSubgraph() { | |||||
| Status StreamAllocator::OptimizeSyncEvents() { | Status StreamAllocator::OptimizeSyncEvents() { | ||||
| map<int64_t, vector<NodePtr>> stream_nodes; | map<int64_t, vector<NodePtr>> stream_nodes; | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| int64_t stream_id = node->GetOpDesc()->GetStreamId(); | int64_t stream_id = node->GetOpDesc()->GetStreamId(); | ||||
| stream_nodes[stream_id].emplace_back(node); | stream_nodes[stream_id].emplace_back(node); | ||||
| @@ -670,7 +671,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| GE_CHK_STATUS_RET(GetMaxStreamAndTask(false, max_stream_count, max_task_count), | GE_CHK_STATUS_RET(GetMaxStreamAndTask(false, max_stream_count, max_task_count), | ||||
| "Get max stream and task count failed."); | "Get max stream and task count failed."); | ||||
| for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &cur_node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(cur_node); | GE_CHECK_NOTNULL(cur_node); | ||||
| auto op_desc = cur_node->GetOpDesc(); | auto op_desc = cur_node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| @@ -773,23 +774,42 @@ bool StreamAllocator::NeedSpiltNewStream(int64_t stream_node_num, int64_t max_no | |||||
| Status StreamAllocator::UpdateActiveStreams(const vector<set<int64_t>> &split_streams) { | Status StreamAllocator::UpdateActiveStreams(const vector<set<int64_t>> &split_streams) { | ||||
| UpdateLabelStreams(split_streams); | UpdateLabelStreams(split_streams); | ||||
| for (auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (auto &node : whole_graph_->GetAllNodes()) { | |||||
| if ((node->GetType() == STREAMSWITCH) || (node->GetType() == STREAMSWITCHN)) { | if ((node->GetType() == STREAMSWITCH) || (node->GetType() == STREAMSWITCHN)) { | ||||
| if (UpdateActiveStreamsForSwitchNode(node) != SUCCESS) { | |||||
| GELOGE(FAILED, "Update active streams for switch node: %s failed.", node->GetName().c_str()); | |||||
| if (InsertActiveNodesAfterSwitch(node) != SUCCESS) { | |||||
| GELOGE(FAILED, "Insert active nodes after switch node failed."); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } else { | } else { | ||||
| if (UpdateActiveStreamsForActiveNode(split_streams, node) != SUCCESS) { | |||||
| GELOGE(FAILED, "Update active streams for active node: %s failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| vector<uint32_t> active_streams; | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
| vector<uint32_t> new_active_streams = active_streams; | |||||
| for (const uint32_t logical_stream : active_streams) { | |||||
| if (static_cast<size_t>(logical_stream) >= split_streams.size()) { | |||||
| GELOGE(FAILED, "logical stream is out of range."); | |||||
| return FAILED; | |||||
| } | |||||
| const set<int64_t> &new_split_streams = split_streams[logical_stream]; | |||||
| if (!new_split_streams.empty()) { | |||||
| for (int64_t split_stream : new_split_streams) { | |||||
| new_active_streams.emplace_back(static_cast<uint32_t>(split_stream)); | |||||
| GELOGI("Add stream %ld to active_stream_list of node %s of graph %s", split_stream, | |||||
| node->GetName().c_str(), node->GetOwnerComputeGraph()->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (!AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, new_active_streams)) { | |||||
| GELOGE(FAILED, "Set active streams for node %s failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| Status status = UpdateActiveStreamsForSubgraphs(); | Status status = UpdateActiveStreamsForSubgraphs(); | ||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(status, "Update active streams for subgraphs failed!"); | |||||
| GELOGE(status, "SetActiveStreamsForSubgraph failed!"); | |||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -820,7 +840,7 @@ void StreamAllocator::UpdateLabelStreams(const vector<set<int64_t>> &split_strea | |||||
| } | } | ||||
| } | } | ||||
| Status StreamAllocator::UpdateActiveStreamsForSwitchNode(NodePtr &switch_node) { | |||||
| Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node) { | |||||
| vector<NodePtr> active_nodes; | vector<NodePtr> active_nodes; | ||||
| if (InsertActiveNodesAfterSwitch(switch_node, active_nodes) != SUCCESS) { | if (InsertActiveNodesAfterSwitch(switch_node, active_nodes) != SUCCESS) { | ||||
| GELOGE(FAILED, "Insert active nodes after node %s failed.", switch_node->GetName().c_str()); | GELOGE(FAILED, "Insert active nodes after node %s failed.", switch_node->GetName().c_str()); | ||||
| @@ -886,38 +906,6 @@ Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node, vecto | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector<set<int64_t>> &split_streams, NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| vector<uint32_t> active_streams; | |||||
| if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
| vector<uint32_t> new_active_streams = active_streams; | |||||
| for (uint32_t logical_stream : active_streams) { | |||||
| if (static_cast<size_t>(logical_stream) >= split_streams.size()) { | |||||
| GELOGE(FAILED, "logical stream is out of range."); | |||||
| return FAILED; | |||||
| } | |||||
| const set<int64_t> &new_split_streams = split_streams[logical_stream]; | |||||
| for (int64_t split_stream : new_split_streams) { | |||||
| for (const auto &node_stream : node_split_stream_map_) { | |||||
| if (split_stream == node_stream.second) { | |||||
| if (node_stream.first->GetOwnerComputeGraph() == node->GetOwnerComputeGraph()) { | |||||
| new_active_streams.emplace_back(static_cast<uint32_t>(split_stream)); | |||||
| GELOGI("Add stream %ld to active_stream_list of node %s of graph %s", split_stream, | |||||
| node->GetName().c_str(), node->GetOwnerComputeGraph()->GetName().c_str()); | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if (!AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, new_active_streams)) { | |||||
| GELOGE(FAILED, "Set active streams for node %s failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | ||||
| // Update active stream list for active nodes | // Update active stream list for active nodes | ||||
| for (auto &node_stream_pair : node_split_stream_map_) { | for (auto &node_stream_pair : node_split_stream_map_) { | ||||
| @@ -938,19 +926,14 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | |||||
| } | } | ||||
| const auto &active_node = it->second; | const auto &active_node = it->second; | ||||
| GE_CHECK_NOTNULL(active_node); | GE_CHECK_NOTNULL(active_node); | ||||
| auto active_op = active_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(active_op); | |||||
| auto op_desc = active_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| vector<uint32_t> active_streams; | vector<uint32_t> active_streams; | ||||
| (void)AttrUtils::GetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams); | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams); | |||||
| set<uint32_t> new_active_streams(active_streams.begin(), active_streams.end()); | set<uint32_t> new_active_streams(active_streams.begin(), active_streams.end()); | ||||
| // specific_activated_streams_ has already contained new split activated stream | |||||
| int64_t new_split_stream = node_stream_pair.second; | |||||
| if (IsActivated(new_split_stream)) { | |||||
| continue; | |||||
| } | |||||
| new_active_streams.emplace(static_cast<uint32_t>(new_split_stream)); | |||||
| new_active_streams.emplace(static_cast<uint32_t>(node_stream_pair.second)); | |||||
| active_streams.assign(new_active_streams.begin(), new_active_streams.end()); | active_streams.assign(new_active_streams.begin(), new_active_streams.end()); | ||||
| if (!AttrUtils::SetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
| if (!AttrUtils::SetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
| GELOGE(FAILED, "Set active streams for node %s failed.", active_node->GetName().c_str()); | GELOGE(FAILED, "Set active streams for node %s failed.", active_node->GetName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -959,20 +942,6 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool StreamAllocator::IsActivated(int64_t stream_id) const { | |||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| vector<uint32_t> active_streams; | |||||
| if (op_desc == nullptr || !AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
| continue; | |||||
| } | |||||
| if (std::find(active_streams.begin(), active_streams.end(), stream_id) != active_streams.end()) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| Status StreamAllocator::SetActiveStreamsForLoop() { | Status StreamAllocator::SetActiveStreamsForLoop() { | ||||
| vector<uint32_t> loop_active_streams; | vector<uint32_t> loop_active_streams; | ||||
| for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | ||||
| @@ -981,7 +950,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
| } | } | ||||
| } | } | ||||
| // Set the stream that needs to be activated | // Set the stream that needs to be activated | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| bool is_loop_active = false; | bool is_loop_active = false; | ||||
| if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | ||||
| @@ -1004,7 +973,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
| } | } | ||||
| Status StreamAllocator::CheckStreamActived() const { | Status StreamAllocator::CheckStreamActived() const { | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| vector<uint32_t> active_streams; | vector<uint32_t> active_streams; | ||||
| if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | ||||
| @@ -1020,6 +989,108 @@ Status StreamAllocator::CheckStreamActived() const { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| // Add active entry stream for special env. | |||||
| Status StreamAllocator::AddActiveEntryStream() { | |||||
| auto gelib = GELib::GetInstance(); | |||||
| bool head_stream = (gelib == nullptr) ? false : gelib->HeadStream(); | |||||
| GELOGI("Configured head stream: %u", head_stream); | |||||
| if (!head_stream) { | |||||
| return SUCCESS; | |||||
| } | |||||
| // Collect streams active by StreamSwitch/StreamActive node. | |||||
| std::set<uint32_t> deactive_stream; | |||||
| for (ge::NodePtr &node : whole_graph_->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| Status ret = CollectDeactiveStream(node->GetOpDesc(), deactive_stream); | |||||
| if (ret != SUCCESS) { | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| // Collect default active stream, Add to active entry stream. | |||||
| std::vector<uint32_t> active_stream_list; | |||||
| for (int64_t stream_id = 0; stream_id < stream_num_; ++stream_id) { | |||||
| if (deactive_stream.count(stream_id) == 0) { | |||||
| active_stream_list.push_back(stream_id); | |||||
| } | |||||
| } | |||||
| int64_t new_stream_id = stream_num_; | |||||
| stream_num_++; | |||||
| return InsertActiveEntryStream(active_stream_list, new_stream_id); | |||||
| } | |||||
| // Collect deactive stream from flowctrl op. | |||||
| Status StreamAllocator::CollectDeactiveStream(const OpDescPtr &op_desc, std::set<uint32_t> &deactive_streams) const { | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| std::string op_type = op_desc->GetType(); | |||||
| if (op_type == STREAMSWITCH) { | |||||
| std::vector<uint32_t> active_stream_list; | |||||
| // If GetListInt fail, active_stream_list is empty. | |||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list); | |||||
| if (active_stream_list.size() != kMaxSwitchStreamNum) { | |||||
| GELOGE(INTERNAL_ERROR, "Stream num of switch true branch must be %u.", kMaxSwitchStreamNum); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| deactive_streams.insert(active_stream_list[0]); | |||||
| GELOGI("Flowctrl_op node:%s, flowctrl stream id:%u.", op_desc->GetName().c_str(), active_stream_list[0]); | |||||
| } else if (op_type == STREAMACTIVE) { | |||||
| if (op_desc->HasAttr(ATTR_NAME_SWITCH_BRANCH_NODE_LABEL)) { | |||||
| std::vector<uint32_t> active_stream_list; | |||||
| if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list)) { | |||||
| GELOGE(INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM fail."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| for (uint32_t deactive_stream : active_stream_list) { | |||||
| deactive_streams.insert(deactive_stream); | |||||
| GELOGI("Flowctrl_op node:%s, flowctrl stream id:%u.", op_desc->GetName().c_str(), deactive_stream); | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| // Insert StreamActive Op for Entry Stream. | |||||
| Status StreamAllocator::InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id) { | |||||
| string node_name = whole_graph_->GetName() + "_ActiveEntryStream_" + string(STREAMACTIVE); | |||||
| OpDescPtr op_desc = ge::MakeShared<OpDesc>(node_name, STREAMACTIVE); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(FAILED, "Failed to new opdesc."); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("Create StreamActive op:%s.", op_desc->GetName().c_str()); | |||||
| GE_CHK_BOOL_EXEC( | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector<std::string>())), | |||||
| GELOGE(FAILED, "SetListStr failed."); | |||||
| return FAILED); | |||||
| NodePtr active_node = whole_graph_->AddNodeFront(op_desc); | |||||
| GE_IF_BOOL_EXEC(active_node == nullptr, | |||||
| GELOGE(FAILED, "Create StreamActive op: %s failed.", op_desc->GetName().c_str()); | |||||
| return INTERNAL_ERROR); | |||||
| GE_CHECK_NOTNULL(active_node->GetOpDesc()); | |||||
| // Add one stream for ActiveEntryStream Task. | |||||
| active_node->GetOpDesc()->SetStreamId(stream_id); | |||||
| GE_CHK_BOOL_EXEC(AttrUtils::SetBool(op_desc, "is_aicpu_stream", true), GELOGE(FAILED, "SetBool failed."); | |||||
| return FAILED); | |||||
| GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(active_node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams), | |||||
| GELOGE(FAILED, "SetListInt failed."); | |||||
| return FAILED); | |||||
| std::vector<std::string> group_names; | |||||
| GE_CHK_BOOL_EXEC(AttrUtils::SetListStr(active_node->GetOpDesc(), ATTR_NAME_SWITCH_BRANCH_NODE_LABEL, group_names), | |||||
| GELOGE(FAILED, "SetLisStr failed."); | |||||
| return FAILED); | |||||
| return SUCCESS; | |||||
| } | |||||
| // Refresh events to continuous events | // Refresh events to continuous events | ||||
| Status StreamAllocator::RefreshContinuousEvents() { | Status StreamAllocator::RefreshContinuousEvents() { | ||||
| // Establish a mapping relationship from old to new event id | // Establish a mapping relationship from old to new event id | ||||
| @@ -1065,7 +1136,7 @@ Status StreamAllocator::RefreshContinuousEvents() { | |||||
| // Insert the real send/recv node in the graph | // Insert the real send/recv node in the graph | ||||
| Status StreamAllocator::InsertSyncEventNodes() { | Status StreamAllocator::InsertSyncEventNodes() { | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| // Add the node corresponding to the recv event | // Add the node corresponding to the recv event | ||||
| vector<uint32_t> recv_event_id_list; | vector<uint32_t> recv_event_id_list; | ||||
| GetRecvEventIdList(node, recv_event_id_list); | GetRecvEventIdList(node, recv_event_id_list); | ||||
| @@ -1152,7 +1223,7 @@ Status StreamAllocator::ReorderEventNodes() const { | |||||
| void StreamAllocator::DumpEvents() { | void StreamAllocator::DumpEvents() { | ||||
| map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
| GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); | GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); | ||||
| int64_t stream_id = node->GetOpDesc()->GetStreamId(); | int64_t stream_id = node->GetOpDesc()->GetStreamId(); | ||||
| after_refresh_stream_nodes[stream_id].emplace_back(node); | after_refresh_stream_nodes[stream_id].emplace_back(node); | ||||
| @@ -59,16 +59,18 @@ class StreamAllocator { | |||||
| Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | ||||
| bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | ||||
| Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &split_streams); | |||||
| Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &splited_streams); | |||||
| void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams); | void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams); | ||||
| Status UpdateActiveStreamsForSwitchNode(NodePtr &switch_node); | |||||
| Status InsertActiveNodesAfterSwitch(NodePtr &switch_node); | |||||
| Status InsertActiveNodesAfterSwitch(NodePtr &switch_nodes, std::vector<NodePtr> &switch_active_nodes); | Status InsertActiveNodesAfterSwitch(NodePtr &switch_nodes, std::vector<NodePtr> &switch_active_nodes); | ||||
| Status UpdateActiveStreamsForActiveNode(const std::vector<std::set<int64_t>> &split_streams, NodePtr &node); | |||||
| Status UpdateActiveStreamsForSubgraphs() const; | Status UpdateActiveStreamsForSubgraphs() const; | ||||
| bool IsActivated(int64_t stream_id) const; | |||||
| Status SetActiveStreamsForLoop(); | Status SetActiveStreamsForLoop(); | ||||
| Status CheckStreamActived() const; | Status CheckStreamActived() const; | ||||
| Status AddActiveEntryStream(); | |||||
| Status CollectDeactiveStream(const OpDescPtr &op_desc, std::set<uint32_t> &deactive_streams) const; | |||||
| Status InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id); | |||||
| Status RefreshContinuousEvents(); | Status RefreshContinuousEvents(); | ||||
| Status InsertSyncEventNodes(); | Status InsertSyncEventNodes(); | ||||
| @@ -29,7 +29,6 @@ | |||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "graph/common/ge_call_wrapper.h" | |||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| using domi::LogTimeStampDef; | using domi::LogTimeStampDef; | ||||
| @@ -48,6 +47,7 @@ const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | |||||
| const char *const kProfilingMode = "PROFILING_MODE"; | const char *const kProfilingMode = "PROFILING_MODE"; | ||||
| const char *const kProfilingFpPoint = "FP_POINT"; | const char *const kProfilingFpPoint = "FP_POINT"; | ||||
| const char *const kProfilingBpPoint = "BP_POINT"; | const char *const kProfilingBpPoint = "BP_POINT"; | ||||
| const char *const kOffOptimize = "off_optimize"; | |||||
| const uint32_t kProfilingArStep = 2; | const uint32_t kProfilingArStep = 2; | ||||
| const uint64_t kProfilingFpStartLogid = 1; | const uint64_t kProfilingFpStartLogid = 1; | ||||
| const uint64_t kProfilingBpEndLogid = 2; | const uint64_t kProfilingBpEndLogid = 2; | ||||
| @@ -75,7 +75,21 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| std::vector<TaskDef> task_def_list; | std::vector<TaskDef> task_def_list; | ||||
| std::map<uint32_t, string> op_name_map; | std::map<uint32_t, string> op_name_map; | ||||
| GE_DUMP(graph, "GenerateTaskBefore"); | GE_DUMP(graph, "GenerateTaskBefore"); | ||||
| Status ret = GenerateTask(run_context, graph, task_def_list, op_name_map); | |||||
| bool is_unknown_shape = false; | |||||
| NodePtr parent_node = graph->GetParentNode(); | |||||
| if (parent_node != nullptr) { | |||||
| auto op_desc = parent_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| (void)AttrUtils::GetBool(op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||||
| } | |||||
| Status ret = SUCCESS; | |||||
| if (is_unknown_shape) { | |||||
| GELOGI("Beign to generate unknown shape task. Graph name is %s.", graph->GetName().c_str()); | |||||
| ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map); | |||||
| } else { | |||||
| GELOGI("Beign to generate known shape task. Graph name is %s.", graph->GetName().c_str()); | |||||
| ret = GenerateTask(run_context, graph, task_def_list, op_name_map); | |||||
| } | |||||
| GE_DUMP(graph, "GenerateTaskAfter"); | GE_DUMP(graph, "GenerateTaskAfter"); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -95,7 +109,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| GELOGE(FAILED, "SetListStr failed."); | GELOGE(FAILED, "SetListStr failed."); | ||||
| return FAILED); | return FAILED); | ||||
| GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
| GELOGI("Generate task success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
| op_name_map.size()); | op_name_map.size()); | ||||
| // Init and serialize model_task_def | // Init and serialize model_task_def | ||||
| @@ -117,7 +131,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Get TaskInfo success. session_id=%lu", session_id); | |||||
| GELOGI("Get TaskInfo success. session id is %lu", session_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -184,7 +198,7 @@ Status TaskGenerator::UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t sessi | |||||
| Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph) { | Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph) { | ||||
| std::map<NodePtr, int64_t> nodes_with_group_attr; | std::map<NodePtr, int64_t> nodes_with_group_attr; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| int64_t group_id = kInvalidGroupId; | int64_t group_id = kInvalidGroupId; | ||||
| @@ -235,13 +249,12 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||||
| Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | ||||
| vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | ||||
| GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); | |||||
| std::shared_ptr<GELib> ge_lib = GELib::GetInstance(); | std::shared_ptr<GELib> ge_lib = GELib::GetInstance(); | ||||
| if ((ge_lib == nullptr) || !ge_lib->InitFlag()) { | if ((ge_lib == nullptr) || !ge_lib->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | ||||
| return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed."); | |||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); | |||||
| ProfilingPoint profiling_point; | ProfilingPoint profiling_point; | ||||
| vector<uint32_t> all_reduce_nodes; | vector<uint32_t> all_reduce_nodes; | ||||
| GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | ||||
| @@ -251,21 +264,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| GE_TIMESTAMP_CALLNUM_START(GenerateTask); | GE_TIMESTAMP_CALLNUM_START(GenerateTask); | ||||
| // map store fusion nodes | // map store fusion nodes | ||||
| map<int64_t, std::vector<NodePtr>> fusion_nodes; | map<int64_t, std::vector<NodePtr>> fusion_nodes; | ||||
| string buffer_optimize = "off_optimize"; | |||||
| string buffer_optimize = kOffOptimize; | |||||
| (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | ||||
| if (buffer_optimize != "off_optimize") { | |||||
| if (buffer_optimize != kOffOptimize) { | |||||
| GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | ||||
| } | } | ||||
| std::unordered_set<Node *> fusion_nodes_seen; | std::unordered_set<Node *> fusion_nodes_seen; | ||||
| int64_t group_key; | int64_t group_key; | ||||
| uint32_t node_index = 0; | uint32_t node_index = 0; | ||||
| rtStream_t stream = nullptr; | |||||
| bool is_unknown_shape = graph->GetGraphUnknownFlag(); | |||||
| if (is_unknown_shape) { | |||||
| GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); | |||||
| } | |||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| node_index++; | node_index++; | ||||
| @@ -295,6 +302,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | ||||
| continue; | continue; | ||||
| } | } | ||||
| OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | ||||
| if (kernel_info_store == nullptr) { | if (kernel_info_store == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), | GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), | ||||
| @@ -303,17 +311,18 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(), | GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(), | ||||
| type.c_str()); | type.c_str()); | ||||
| int64_t op_id = op_desc->GetId(); | |||||
| int64_t stream_id = op_desc->GetStreamId(); | |||||
| if (stream_id < 0 || stream_id >= static_cast<int64_t>(run_context.graphStreamList.size())) { | |||||
| GELOGE(INTERNAL_ERROR, "node[name:%s(%s), id:%ld] stream id is invalid, stream list size=%zu", name.c_str(), | |||||
| type.c_str(), op_id, run_context.graphStreamList.size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| // Profiling task | // Profiling task | ||||
| size_t task_list_size_before = task_def_list.size(); | size_t task_list_size_before = task_def_list.size(); | ||||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | ||||
| int64_t op_id = op_desc->GetId(); | |||||
| // Compatible with dynamic shape scenes, the default is 0 | |||||
| int64_t stream_id = 0; | |||||
| if (!is_unknown_shape) { | |||||
| stream_id = op_desc->GetStreamId(); | |||||
| GE_CHK_STATUS_RET(SetKnownShapeStream(run_context, stream_id), "node[name:%s(%s), id:%ld] stream id is invalid.", | |||||
| name.c_str(), type.c_str(), op_id); | |||||
| } | |||||
| run_context.stream = run_context.graphStreamList[stream_id]; | |||||
| GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | ||||
| name.c_str(), type.c_str(), op_id, stream_id); | name.c_str(), type.c_str(), op_id, stream_id); | ||||
| GE_TIMESTAMP_RESTART(GenerateTask); | GE_TIMESTAMP_RESTART(GenerateTask); | ||||
| @@ -346,14 +355,131 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| GE_CHECK_NOTNULL(task_def_ptr); | GE_CHECK_NOTNULL(task_def_ptr); | ||||
| task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | ||||
| } | } | ||||
| GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | ||||
| op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | ||||
| task_list_size_after - task_list_size_before); | task_list_size_after - task_list_size_before); | ||||
| } | } | ||||
| if (is_unknown_shape) { | |||||
| GE_CHK_STATUS_RET(DestroyUnknownShapeStream(run_context, stream), "Destory unknown shape stream failed."); | |||||
| GE_TIMESTAMP_CALLNUM_END(GenerateTask, "GraphBuild::GenerateTask"); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeGraphPtr &graph, | |||||
| vector<domi::TaskDef> &task_def_list, | |||||
| map<uint32_t, string> &op_name_map) { | |||||
| std::shared_ptr<GELib> ge_lib = GELib::GetInstance(); | |||||
| if ((ge_lib == nullptr) || !ge_lib->InitFlag()) { | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | |||||
| return GE_CLI_GE_NOT_INITIALIZED; | |||||
| } | |||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); | |||||
| ProfilingPoint profiling_point; | |||||
| vector<uint32_t> all_reduce_nodes; | |||||
| GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | |||||
| const OpsKernelManager &ops_kernel_manager = ge_lib->OpsKernelManagerObj(); | |||||
| GE_TIMESTAMP_CALLNUM_START(GenerateTask); | |||||
| // map store fusion nodes | |||||
| map<int64_t, std::vector<NodePtr>> fusion_nodes; | |||||
| string buffer_optimize = kOffOptimize; | |||||
| (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | |||||
| if (buffer_optimize != kOffOptimize) { | |||||
| GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | |||||
| } | |||||
| std::unordered_set<Node *> fusion_nodes_seen; | |||||
| int64_t group_key; | |||||
| uint32_t node_index = 0; | |||||
| rtStream_t stream = nullptr; | |||||
| GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); | |||||
| run_context.stream = stream; | |||||
| if (rtModelBindStream(run_context.model, stream, 0) != RT_ERROR_NONE) { | |||||
| GELOGE(FAILED, "Call rt api failed."); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| return FAILED; | |||||
| } | } | ||||
| GE_TIMESTAMP_CALLNUM_EVENT_END(GenerateTask, "GraphBuild::GenerateTask"); | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| node_index++; | |||||
| string name = node->GetName(); | |||||
| string type = node->GetType(); | |||||
| bool attr_notask = false; | |||||
| bool get_attr_notask_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask); | |||||
| GE_IF_BOOL_EXEC(get_attr_notask_flag && attr_notask, | |||||
| GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | |||||
| continue); | |||||
| GE_CHK_STATUS_RET(UpdateOpIsVarAttr(op_desc, graph->GetSessionID())); | |||||
| string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | |||||
| // For fusion ddb pass, task def must be continuous. | |||||
| // Part2: Call | |||||
| auto fusion_task_info = | |||||
| FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | |||||
| ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; | |||||
| GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | |||||
| "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | |||||
| // continue directly | |||||
| if (ge::AttrUtils::GetInt(op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key)) { | |||||
| GELOGI("Fusion node[name:%s, type:%s] do not need generate task again.", name.c_str(), type.c_str()); | |||||
| continue; | |||||
| } | |||||
| if (op_kernel_lib_name.empty()) { | |||||
| GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | |||||
| continue; | |||||
| } | |||||
| OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | |||||
| if (kernel_info_store == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), | |||||
| type.c_str(), op_kernel_lib_name.c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(), | |||||
| type.c_str()); | |||||
| int64_t op_id = op_desc->GetId(); | |||||
| int64_t stream_id = op_desc->GetStreamId(); | |||||
| // Profiling task | |||||
| size_t task_list_size_before = task_def_list.size(); | |||||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
| GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | |||||
| name.c_str(), type.c_str(), op_id, stream_id); | |||||
| GE_TIMESTAMP_RESTART(GenerateTask); | |||||
| auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list); | |||||
| GE_TIMESTAMP_ADD(GenerateTask); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task failed.", | |||||
| op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id); | |||||
| return ret; | |||||
| } | |||||
| // Profiling task | |||||
| GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
| size_t task_list_size_after = task_def_list.size(); | |||||
| // If tasks is reduced | |||||
| if (task_list_size_after < task_list_size_before) { | |||||
| GELOGE(FAILED, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task. but task num from %zu to %zu.", | |||||
| op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, task_list_size_before, | |||||
| task_list_size_after); | |||||
| return FAILED; | |||||
| } | |||||
| // Reset stream id to ge stream id, as graph load must use ge stream to reassign stream | |||||
| void *ops_kernel_info_store_ptr = kernel_info_store.get(); | |||||
| for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) { | |||||
| op_name_map[idx] = name; | |||||
| // Set opsKernelInfoStorePtr and op_index, the two fields be use in DistributeTask and InitTaskInfo | |||||
| TaskDef *task_def_ptr = &task_def_list[idx]; | |||||
| GE_CHECK_NOTNULL(task_def_ptr); | |||||
| task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | |||||
| } | |||||
| GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | |||||
| op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | |||||
| task_list_size_after - task_list_size_before); | |||||
| } | |||||
| GE_CHK_RT(rtModelUnbindStream(run_context.model, stream)); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| GE_TIMESTAMP_CALLNUM_END(GenerateTask, "GraphBuild::GenerateTask"); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -502,11 +628,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
| return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
| } | } | ||||
| const auto all_nodes = graph->GetNodes(graph->GetGraphUnknownFlag()); | |||||
| if (all_nodes.empty()) { | |||||
| GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Graph's node is empty"); | |||||
| return GE_GRAPH_GRAPH_NODE_NULL; | |||||
| } | |||||
| const auto all_nodes = graph->GetAllNodes(); | |||||
| int64_t node_index = 0; | int64_t node_index = 0; | ||||
| for (auto &node : all_nodes) { | for (auto &node : all_nodes) { | ||||
| @@ -593,7 +715,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| OpDescPtr fp_op_desc = nullptr; | OpDescPtr fp_op_desc = nullptr; | ||||
| uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
| uint32_t first_fp = 0; | uint32_t first_fp = 0; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | ||||
| @@ -620,7 +742,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); | GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| current_idx++; | current_idx++; | ||||
| @@ -641,7 +763,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| uint32_t last_bp = 0; | uint32_t last_bp = 0; | ||||
| uint32_t iter_end = 0; | uint32_t iter_end = 0; | ||||
| uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| current_idx++; | current_idx++; | ||||
| @@ -685,7 +807,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| GE_CHECK_NOTNULL(bp_op_desc); | GE_CHECK_NOTNULL(bp_op_desc); | ||||
| current_idx = 0; | current_idx = 0; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| current_idx++; | current_idx++; | ||||
| @@ -704,7 +826,7 @@ Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
| GELOGI("Start FindFpOfEnv"); | GELOGI("Start FindFpOfEnv"); | ||||
| uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
| uint32_t first_fp = 0; | uint32_t first_fp = 0; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| current_idx++; | current_idx++; | ||||
| @@ -729,7 +851,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
| uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
| uint32_t iter_end = 0; | uint32_t iter_end = 0; | ||||
| uint32_t last_bp = 0; | uint32_t last_bp = 0; | ||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| current_idx++; | current_idx++; | ||||
| @@ -805,10 +927,10 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
| bool train_graph = graph->GetNeedIteration(); | bool train_graph = graph->GetNeedIteration(); | ||||
| if (profiling_point.fp_index == 0 && train_graph) { | if (profiling_point.fp_index == 0 && train_graph) { | ||||
| GELOGW("First forward op name can't be found in graph for training trace."); | |||||
| GELOGE(FAILED, "First forward op name can't be found in graph for training trace."); | |||||
| } | } | ||||
| if (profiling_point.bp_index == 0 && train_graph) { | if (profiling_point.bp_index == 0 && train_graph) { | ||||
| GELOGW("Last backward op name can't be found in graph for training trace."); | |||||
| GELOGE(FAILED, "Last backward op name can't be found in graph for training trace."); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -946,31 +1068,4 @@ bool TaskGenerator::IsProfPoint(const OpDescPtr &op, const std::string &name) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| Status TaskGenerator::SetUnknownShapeStream(RunContext &run_context, rtStream_t &stream) { | |||||
| GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); | |||||
| run_context.stream = stream; | |||||
| rtError_t rt_ret = rtModelBindStream(run_context.model, stream, 0); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
| GE_CHK_RT_RET(rtStreamDestroy(stream)); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TaskGenerator::DestroyUnknownShapeStream(RunContext &run_context, rtStream_t &stream) { | |||||
| GE_CHK_RT(rtModelUnbindStream(run_context.model, stream)); | |||||
| GE_CHK_RT_RET(rtStreamDestroy(stream)); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TaskGenerator::SetKnownShapeStream(RunContext &run_context, int64_t stream_id) { | |||||
| if (stream_id < 0 || stream_id >= static_cast<int64_t>(run_context.graphStreamList.size())) { | |||||
| GELOGE(INTERNAL_ERROR, "Stream id[%ld] is invalid, stream list size=%zu", stream_id, | |||||
| run_context.graphStreamList.size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| run_context.stream = run_context.graphStreamList[stream_id]; | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -93,6 +93,18 @@ class TaskGenerator { | |||||
| Status GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, std::vector<domi::TaskDef> &task_def_list, | Status GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, std::vector<domi::TaskDef> &task_def_list, | ||||
| std::map<uint32_t, string> &op_name_map); | std::map<uint32_t, string> &op_name_map); | ||||
| /// | |||||
| /// call engine to generate unknown shape task. | |||||
| /// @param run_context run context | |||||
| /// @param graph compute graph | |||||
| /// @param task_def_list task def list generate by engine | |||||
| /// @param op_name_map relation of task index and op | |||||
| /// @return SUCCESS:seccess | |||||
| /// Other: failed | |||||
| /// | |||||
| Status GenerateUnknownShapeTask(RunContext &run_context, ComputeGraphPtr &graph, | |||||
| std::vector<domi::TaskDef> &task_def_list, std::map<uint32_t, string> &op_name_map); | |||||
| /// | /// | ||||
| /// AddModelTaskToModel | /// AddModelTaskToModel | ||||
| /// @param model_task_def model task | /// @param model_task_def model task | ||||
| @@ -142,12 +154,6 @@ class TaskGenerator { | |||||
| Status SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph); | Status SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph); | ||||
| Status SetUnknownShapeStream(RunContext &run_context, rtStream_t &stream); | |||||
| Status DestroyUnknownShapeStream(RunContext &run_context, rtStream_t &stream); | |||||
| Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); | |||||
| uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
| uint64_t var_mem_size_ = 0; | uint64_t var_mem_size_ = 0; | ||||
| }; | }; | ||||