From 6ccf18aced863438c1b964d2746f7660533b8238 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Thu, 21 Jan 2021 13:51:33 +0800 Subject: [PATCH] profiling --- ge/graph/build/graph_builder.cc | 7 + ge/graph/build/task_generator.cc | 145 +++++++++++------- ge/graph/build/task_generator.h | 12 +- ge/hybrid/executor/worker/execution_engine.cc | 32 ++++ ge/hybrid/model/hybrid_model_builder.cc | 59 ++++--- ge/hybrid/node_executor/task_context.cc | 27 ---- 6 files changed, 180 insertions(+), 102 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 7b09cbc6..68c0e00c 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -37,6 +37,8 @@ using domi::BuildMode; namespace { const int32_t kInvalidPerfLevel = -1; +const int64_t kProfilingArStep = 2; +const int64_t kProfilingArStartLogid = 3; enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; } // namespace namespace ge { @@ -457,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { if (all_reduce_node_index[i] == node_index) { GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid; + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); continue; } } diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index bb72fa8a..7a39052b 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map> &fusion return SUCCESS; } +bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const { + auto parent_graph_ptr = graph->GetParentGraph(); + if (parent_graph_ptr == nullptr) { + return false; + } + auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr); + if (root_graph_ptr == nullptr) { + return false; + } + + return root_graph_ptr->GetGraphUnknownFlag(); +} + Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, vector &task_def_list, map &op_name_map) { GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); @@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra }; GE_MAKE_GUARD(release, callback); - uint64_t all_reduce_node_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // Part2: Call auto fusion_task_info = FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra type.c_str()); // Profiling task size_t task_list_size_before = task_def_list.size(); - GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_node_idx)); + GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); int64_t op_id = op_desc->GetId(); // Compatible with dynamic shape scenes, the default is 0 int64_t stream_id = 0; @@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra return ret; } // Profiling task - GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_node_idx)); + GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); size_t task_list_size_after = task_def_list.size(); // If tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info auto &op_name_map = fusion_task_info.op_name_map; auto &profiling_point = fusion_task_info.profiling_point; auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; - auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; // If op_desc have this attr, call nodes with same group key in a stream together if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && (fusion_nodes_seen.count(node.get()) == 0)) { @@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return INTERNAL_ERROR; } // profiling task - (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_idx); + (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); run_context.stream = run_context.graphStreamList[stream_id]; GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); @@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return ret; } // profiling task - (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_idx); + (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); size_t task_list_size_after = task_def_list.size(); // if tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -850,6 +857,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi GELOGD("Profiling is not open."); return SUCCESS; } + + // subgraph of dynamic graph no need to find index, has been found in parent graph + if (IsSubGraphOfDynamicGraph(graph)) { + GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); + return SUCCESS; + } + GELOGI("Start get FP/BP index."); std::string fp_point_str; std::string bp_point_str; @@ -887,9 +901,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } +Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task) { + bool is_insert_all_reduce_task = false; + int64_t ar_log_id = 0xFFFF; + if (is_insert_bp_profiling_task) { + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); + is_insert_all_reduce_task = true; + } + if (!is_insert_all_reduce_task) { + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { + if (all_reduce_nodes[i] == node_index) { + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + ar_log_id = i * kProfilingArStep + kProfilingArStartLogid; + is_insert_all_reduce_task = true; + break; + } + } + } + + if (is_insert_all_reduce_task) { + GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); + TaskDef ar_task_def; + ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + ar_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); + if (ar_log_def != nullptr) { + ar_log_def->set_logid(ar_log_id); + ar_log_def->set_notify(false); + } + task_def_list.push_back(ar_task_def); + } + + return SUCCESS; +} + Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list, uint64_t &all_reduce_node_idx) { + vector &task_def_list) { const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); @@ -932,19 +984,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const } bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); - uint64_t all_reduce_task_idx = 0; + if (is_all_reduce) { + (void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index, + task_def_list, is_insert_bp_profiling_task); + } + + return SUCCESS; +} + +Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task) { bool is_insert_all_reduce_task = false; - if (is_all_reduce && is_insert_bp_profiling_task) { - all_reduce_task_idx = all_reduce_node_idx; + int64_t ar_log_id = 0xFFFF; + if (is_insert_bp_profiling_task) { + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); + ar_log_id += 1; is_insert_all_reduce_task = true; } - if (is_all_reduce) { - all_reduce_node_idx++; - } if (!is_insert_all_reduce_task) { for (size_t i = 0; i < all_reduce_nodes.size(); i++) { if (all_reduce_nodes[i] == node_index) { - all_reduce_task_idx = i; + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + ar_log_id = i * kProfilingArStep + kProfilingArEndLogid; is_insert_all_reduce_task = true; break; } @@ -952,28 +1016,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const } if (is_insert_all_reduce_task) { - GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); + GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); if (ar_log_def != nullptr) { - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), - GELOGE(FAILED, "Multiply result is out of range."); - return FAILED); - auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; - ar_log_def->set_logid(log_id); + ar_log_def->set_logid(ar_log_id); ar_log_def->set_notify(false); - (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); } task_def_list.push_back(ar_task_def); } + return SUCCESS; } Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list, uint64_t all_reduce_node_idx) { + vector &task_def_list) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || @@ -1018,36 +1078,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P task_def_list.emplace_back(end_task_def); } - uint32_t all_reduce_task_idx = 0; - bool is_insert_all_reduce_task = false; - if (is_all_reduce && is_insert_bp_profiling_task) { - all_reduce_task_idx = all_reduce_node_idx; - is_insert_all_reduce_task = true; - } - - for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] == node_index) { - all_reduce_task_idx = i; - is_insert_all_reduce_task = true; - break; - } + if (is_all_reduce) { + (void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index, + task_def_list, is_insert_bp_profiling_task); } - if (is_insert_all_reduce_task) { - GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); - TaskDef ar_task_def; - ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); - ar_task_def.set_stream_id(op_desc->GetStreamId()); - LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); - GE_CHECK_NOTNULL(ar_log_def); - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), - GELOGE(FAILED, "Multiply result is out of range."); - return FAILED); - auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; - ar_log_def->set_logid(log_id); - ar_log_def->set_notify(false); - task_def_list.emplace_back(ar_task_def); - } return SUCCESS; } diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index 5970954c..9f12d568 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -129,10 +129,16 @@ class TaskGenerator { std::vector &all_reduce_nodes) const; Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list, uint64_t &all_reduce_node_idx); + std::vector &task_def_list); + Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_listy, + bool is_insert_bp_profiling_task); Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list, uint64_t all_reduce_node_idx); + std::vector &task_def_list); + Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task); static bool IsProfPoint(const OpDescPtr &op, const std::string &name); @@ -155,6 +161,8 @@ class TaskGenerator { Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); + bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const; + uint8_t *var_mem_base_ = nullptr; uint64_t var_mem_size_ = 0; }; diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 44f7d87f..a6386b27 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -174,6 +174,38 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel compute_graph_info = context_->GetProfilingGraphDescInfo(); context_->ClearProfilingGraphDescInfo(); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + for (auto &tmp_compute_graph_info : compute_graph_info) { + // default + if (op_desc->GetAllInputsSize() == 0) { + tmp_compute_graph_info.input_format = { FORMAT_NULL }; + tmp_compute_graph_info.input_shape = { {0} }; + tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; + } + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); + if (input_desc == nullptr) { + continue; + } + tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); + tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); + tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); + } + + if (op_desc->GetOutputsSize() == 0) { + tmp_compute_graph_info.output_format = { FORMAT_NULL }; + tmp_compute_graph_info.output_shape = { {0} }; + tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; + } + for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { + GeTensorDesc output_desc = op_desc->GetOutputDesc(j); + tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); + tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); + tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); + } + } + return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 861cd30a..20cecc99 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1608,16 +1608,19 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons GE_CHECK_NOTNULL(compute_graph); NodePtr node_ptr = nullptr; - vector task_def_list; + map> node_task_map; // create fp node bool is_insert_fp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); if (is_insert_fp_profiling_task) { + vector task_def_list; (void)GenerateFpProfilingTask(op_desc, task_def_list); auto fp_desc = MakeShared(kProfilingFpNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(fp_desc); fp_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(fp_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create fp profiling node success before."); } // creat all reduce start node @@ -1625,6 +1628,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if (is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; int64_t log_id = 0; (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); GELOGD("All reduce node profiling task log id: %ld before", log_id); @@ -1634,18 +1638,24 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons GE_CHECK_NOTNULL(ar_desc_start); ar_desc_start->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(ar_desc_start); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create all reduce start profiling node success before."); } - if (node_ptr != nullptr) { - for (const auto &task_def : task_def_list) { - hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + if (!node_task_map.empty()) { + for (const auto &node_task : node_task_map) { + NodePtr profiling_node = node_task.first; + vector task_def_lists = node_task.second; + for (const auto &task_def : task_def_lists) { + hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); } - NodeItem *node_item = nullptr; - GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); - node_item->input_start = 0; - node_item->output_start = 0; - graph_item.node_items_.emplace_back(node_item); } else { GELOGD("No need to create profiling node before."); } @@ -1661,12 +1671,13 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const GE_CHECK_NOTNULL(compute_graph); NodePtr node_ptr = nullptr; - vector task_def_list; + map> node_task_map; // Create all reduce end node bool is_insert_bp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if (is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; int64_t log_id = 0; (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); GELOGD("All reduce node profiling task log id: %ld after", log_id); @@ -1676,38 +1687,50 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const GE_CHECK_NOTNULL(ar_desc_end); ar_desc_end->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(ar_desc_end); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create all reduce end profiling node success after."); } // create bp node if (!is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; (void) GenerateBpProfilingTask(op_desc, task_def_list); auto bp_op_desc = MakeShared(kProfilingBpNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(bp_op_desc); bp_op_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(bp_op_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create bp profiling node success after."); } // create end node bool is_insert_end_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); if (is_insert_end_profiling_task) { + vector task_def_list; (void)GenerateEndProfilingTask(op_desc, task_def_list); auto end_desc = MakeShared(kProfilingEndNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(end_desc); end_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(end_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create end profiling node success after."); } - if (node_ptr != nullptr) { - for (const auto &task_def : task_def_list) { - hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + if (!node_task_map.empty()) { + for (const auto &node_task : node_task_map) { + NodePtr profiling_node = node_task.first; + vector task_def_lists = node_task.second; + for (const auto &task_def : task_def_lists) { + hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); } - NodeItem *node_item = nullptr; - GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); - node_item->input_start = 0; - node_item->output_start = 0; - graph_item.node_items_.emplace_back(node_item); } else { GELOGD("No need to create profiling node after."); } diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index de583ef1..bc318124 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -554,33 +554,6 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream tmp_compute_graph_info.model_name = dynamic_model_name; tmp_compute_graph_info.op_name = op_desc->GetName(); tmp_compute_graph_info.op_type = op_desc->GetType(); - // default - if (op_desc->GetAllInputsSize() == 0) { - tmp_compute_graph_info.input_format = { FORMAT_NULL }; - tmp_compute_graph_info.input_shape = { {0} }; - tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; - } - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - if (op_desc->GetOutputsSize() == 0) { - tmp_compute_graph_info.output_format = { FORMAT_NULL }; - tmp_compute_graph_info.output_shape = { {0} }; - tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; - } - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } tmp_compute_graph_info.task_id = task_id; tmp_compute_graph_info.stream_id = stream_id; compute_graph_info.emplace_back(tmp_compute_graph_info);