Browse Source

profiling

tags/v1.2.0
zhengyuanhua 3 years ago
parent
commit
6ccf18aced
6 changed files with 180 additions and 102 deletions
  1. +7
    -0
      ge/graph/build/graph_builder.cc
  2. +90
    -55
      ge/graph/build/task_generator.cc
  3. +10
    -2
      ge/graph/build/task_generator.h
  4. +32
    -0
      ge/hybrid/executor/worker/execution_engine.cc
  5. +41
    -18
      ge/hybrid/model/hybrid_model_builder.cc
  6. +0
    -27
      ge/hybrid/node_executor/task_context.cc

+ 7
- 0
ge/graph/build/graph_builder.cc View File

@@ -37,6 +37,8 @@ using domi::BuildMode;

namespace {
const int32_t kInvalidPerfLevel = -1;
const int64_t kProfilingArStep = 2;
const int64_t kProfilingArStartLogid = 3;
enum NodeType { kSubgraphData, kSubgraphNode, kOthers };
} // namespace
namespace ge {
@@ -457,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
if (all_reduce_node_index[i] == node_index) {
GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
(void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid;
(void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
continue;
}
}


+ 90
- 55
ge/graph/build/task_generator.cc View File

@@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion
return SUCCESS;
}

bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const {
auto parent_graph_ptr = graph->GetParentGraph();
if (parent_graph_ptr == nullptr) {
return false;
}
auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr);
if (root_graph_ptr == nullptr) {
return false;
}

return root_graph_ptr->GetGraphUnknownFlag();
}

Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph,
vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) {
GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str());
@@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
};
GE_MAKE_GUARD(release, callback);

uint64_t all_reduce_node_idx = 0;
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
@@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
// Part2: Call
auto fusion_task_info =
FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib,
ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx};
ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes};
GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen),
"Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str());
// continue directly
@@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
type.c_str());
// Profiling task
size_t task_list_size_before = task_def_list.size();
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
node_index, task_def_list, all_reduce_node_idx));
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
int64_t op_id = op_desc->GetId();
// Compatible with dynamic shape scenes, the default is 0
int64_t stream_id = 0;
@@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
return ret;
}
// Profiling task
GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
node_index, task_def_list, all_reduce_node_idx));
GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));
size_t task_list_size_after = task_def_list.size();
// If tasks is reduced
if (task_list_size_after < task_list_size_before) {
@@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
auto &op_name_map = fusion_task_info.op_name_map;
auto &profiling_point = fusion_task_info.profiling_point;
auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes;
auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx;
// If op_desc have this attr, call nodes with same group key in a stream together
if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) &&
(fusion_nodes_seen.count(node.get()) == 0)) {
@@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
return INTERNAL_ERROR;
}
// profiling task
(void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes,
node_index, task_def_list, all_reduce_idx);
(void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
run_context.stream = run_context.graphStreamList[stream_id];
GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.",
op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id);
@@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info
return ret;
}
// profiling task
(void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes,
node_index, task_def_list, all_reduce_idx);
(void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list);
size_t task_list_size_after = task_def_list.size();
// if tasks is reduced
if (task_list_size_after < task_list_size_before) {
@@ -850,6 +857,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
GELOGD("Profiling is not open.");
return SUCCESS;
}

// subgraph of dynamic graph no need to find index, has been found in parent graph
if (IsSubGraphOfDynamicGraph(graph)) {
GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str());
return SUCCESS;
}

GELOGI("Start get FP/BP index.");
std::string fp_point_str;
std::string bp_point_str;
@@ -887,9 +901,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
return SUCCESS;
}

Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
uint32_t node_index, std::vector<domi::TaskDef> &task_def_list,
bool is_insert_bp_profiling_task) {
bool is_insert_all_reduce_task = false;
int64_t ar_log_id = 0xFFFF;
if (is_insert_bp_profiling_task) {
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id);
is_insert_all_reduce_task = true;
}
if (!is_insert_all_reduce_task) {
for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
if (all_reduce_nodes[i] == node_index) {
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
ar_log_id = i * kProfilingArStep + kProfilingArStartLogid;
is_insert_all_reduce_task = true;
break;
}
}
}

if (is_insert_all_reduce_task) {
GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id);
TaskDef ar_task_def;
ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
ar_task_def.set_stream_id(op_desc->GetStreamId());
LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
if (ar_log_def != nullptr) {
ar_log_def->set_logid(ar_log_id);
ar_log_def->set_notify(false);
}
task_def_list.push_back(ar_task_def);
}

return SUCCESS;
}

Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) {
vector<domi::TaskDef> &task_def_list) {
const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
@@ -932,19 +984,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
}

bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
uint64_t all_reduce_task_idx = 0;
if (is_all_reduce) {
(void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index,
task_def_list, is_insert_bp_profiling_task);
}

return SUCCESS;
}

Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
uint32_t node_index, std::vector<domi::TaskDef> &task_def_list,
bool is_insert_bp_profiling_task) {
bool is_insert_all_reduce_task = false;
if (is_all_reduce && is_insert_bp_profiling_task) {
all_reduce_task_idx = all_reduce_node_idx;
int64_t ar_log_id = 0xFFFF;
if (is_insert_bp_profiling_task) {
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id);
ar_log_id += 1;
is_insert_all_reduce_task = true;
}
if (is_all_reduce) {
all_reduce_node_idx++;
}
if (!is_insert_all_reduce_task) {
for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
if (all_reduce_nodes[i] == node_index) {
all_reduce_task_idx = i;
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep),
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
ar_log_id = i * kProfilingArStep + kProfilingArEndLogid;
is_insert_all_reduce_task = true;
break;
}
@@ -952,28 +1016,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
}

if (is_insert_all_reduce_task) {
GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id);
TaskDef ar_task_def;
ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
ar_task_def.set_stream_id(op_desc->GetStreamId());
LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
if (ar_log_def != nullptr) {
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid;
ar_log_def->set_logid(log_id);
ar_log_def->set_logid(ar_log_id);
ar_log_def->set_notify(false);
(void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
}
task_def_list.push_back(ar_task_def);
}

return SUCCESS;
}

Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) {
vector<domi::TaskDef> &task_def_list) {
GE_CHECK_NOTNULL(op_desc);
const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
@@ -1018,36 +1078,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
task_def_list.emplace_back(end_task_def);
}

uint32_t all_reduce_task_idx = 0;
bool is_insert_all_reduce_task = false;
if (is_all_reduce && is_insert_bp_profiling_task) {
all_reduce_task_idx = all_reduce_node_idx;
is_insert_all_reduce_task = true;
}

for (size_t i = 0; i < all_reduce_nodes.size(); i++) {
if (all_reduce_nodes[i] == node_index) {
all_reduce_task_idx = i;
is_insert_all_reduce_task = true;
break;
}
if (is_all_reduce) {
(void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index,
task_def_list, is_insert_bp_profiling_task);
}

if (is_insert_all_reduce_task) {
GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index);
TaskDef ar_task_def;
ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE);
ar_task_def.set_stream_id(op_desc->GetStreamId());
LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp();
GE_CHECK_NOTNULL(ar_log_def);
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep),
GELOGE(FAILED, "Multiply result is out of range.");
return FAILED);
auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid;
ar_log_def->set_logid(log_id);
ar_log_def->set_notify(false);
task_def_list.emplace_back(ar_task_def);
}
return SUCCESS;
}



+ 10
- 2
ge/graph/build/task_generator.h View File

@@ -129,10 +129,16 @@ class TaskGenerator {
std::vector<uint32_t> &all_reduce_nodes) const;
Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx);
std::vector<domi::TaskDef> &task_def_list);
Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
uint32_t node_index, std::vector<domi::TaskDef> &task_def_listy,
bool is_insert_bp_profiling_task);
Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point,
std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx);
std::vector<domi::TaskDef> &task_def_list);
Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes,
uint32_t node_index, std::vector<domi::TaskDef> &task_def_list,
bool is_insert_bp_profiling_task);

static bool IsProfPoint(const OpDescPtr &op, const std::string &name);

@@ -155,6 +161,8 @@ class TaskGenerator {

Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id);

bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const;

uint8_t *var_mem_base_ = nullptr;
uint64_t var_mem_size_ = 0;
};


+ 32
- 0
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -174,6 +174,38 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel
compute_graph_info = context_->GetProfilingGraphDescInfo();
context_->ClearProfilingGraphDescInfo();

auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (auto &tmp_compute_graph_info : compute_graph_info) {
// default
if (op_desc->GetAllInputsSize() == 0) {
tmp_compute_graph_info.input_format = { FORMAT_NULL };
tmp_compute_graph_info.input_shape = { {0} };
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
}
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
if (input_desc == nullptr) {
continue;
}
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
}

if (op_desc->GetOutputsSize() == 0) {
tmp_compute_graph_info.output_format = { FORMAT_NULL };
tmp_compute_graph_info.output_shape = { {0} };
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
}
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
}
}

return SUCCESS;
}



+ 41
- 18
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -1608,16 +1608,19 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
GE_CHECK_NOTNULL(compute_graph);

NodePtr node_ptr = nullptr;
vector<domi::TaskDef> task_def_list;
map<NodePtr, vector<domi::TaskDef>> node_task_map;
// create fp node
bool is_insert_fp_profiling_task = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task);
if (is_insert_fp_profiling_task) {
vector<domi::TaskDef> task_def_list;
(void)GenerateFpProfilingTask(op_desc, task_def_list);
auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE);
GE_CHECK_NOTNULL(fp_desc);
fp_desc->SetOpKernelLibName(kEngineNameRts);
node_ptr = compute_graph->AddNode(fp_desc);
GE_CHECK_NOTNULL(node_ptr);
node_task_map[node_ptr] = task_def_list;
GELOGD("Create fp profiling node success before.");
}
// creat all reduce start node
@@ -1625,6 +1628,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
if (is_all_reduce && is_insert_bp_profiling_task) {
vector<domi::TaskDef> task_def_list;
int64_t log_id = 0;
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
GELOGD("All reduce node profiling task log id: %ld before", log_id);
@@ -1634,18 +1638,24 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons
GE_CHECK_NOTNULL(ar_desc_start);
ar_desc_start->SetOpKernelLibName(kEngineNameRts);
node_ptr = compute_graph->AddNode(ar_desc_start);
GE_CHECK_NOTNULL(node_ptr);
node_task_map[node_ptr] = task_def_list;
GELOGD("Create all reduce start profiling node success before.");
}

if (node_ptr != nullptr) {
for (const auto &task_def : task_def_list) {
hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
if (!node_task_map.empty()) {
for (const auto &node_task : node_task_map) {
NodePtr profiling_node = node_task.first;
vector<domi::TaskDef> task_def_lists = node_task.second;
for (const auto &task_def : task_def_lists) {
hybrid_model_.task_defs_[profiling_node].emplace_back(task_def);
}
NodeItem *node_item = nullptr;
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item));
node_item->input_start = 0;
node_item->output_start = 0;
graph_item.node_items_.emplace_back(node_item);
}
NodeItem *node_item = nullptr;
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
node_item->input_start = 0;
node_item->output_start = 0;
graph_item.node_items_.emplace_back(node_item);
} else {
GELOGD("No need to create profiling node before.");
}
@@ -1661,12 +1671,13 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const
GE_CHECK_NOTNULL(compute_graph);

NodePtr node_ptr = nullptr;
vector<domi::TaskDef> task_def_list;
map<NodePtr, vector<domi::TaskDef>> node_task_map;
// Create all reduce end node
bool is_insert_bp_profiling_task = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task);
bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE);
if (is_all_reduce && is_insert_bp_profiling_task) {
vector<domi::TaskDef> task_def_list;
int64_t log_id = 0;
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id);
GELOGD("All reduce node profiling task log id: %ld after", log_id);
@@ -1676,38 +1687,50 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const
GE_CHECK_NOTNULL(ar_desc_end);
ar_desc_end->SetOpKernelLibName(kEngineNameRts);
node_ptr = compute_graph->AddNode(ar_desc_end);
GE_CHECK_NOTNULL(node_ptr);
node_task_map[node_ptr] = task_def_list;
GELOGD("Create all reduce end profiling node success after.");
}
// create bp node
if (!is_all_reduce && is_insert_bp_profiling_task) {
vector<domi::TaskDef> task_def_list;
(void) GenerateBpProfilingTask(op_desc, task_def_list);
auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE);
GE_CHECK_NOTNULL(bp_op_desc);
bp_op_desc->SetOpKernelLibName(kEngineNameRts);
node_ptr = compute_graph->AddNode(bp_op_desc);
GE_CHECK_NOTNULL(node_ptr);
node_task_map[node_ptr] = task_def_list;
GELOGD("Create bp profiling node success after.");
}
// create end node
bool is_insert_end_profiling_task = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task);
if (is_insert_end_profiling_task) {
vector<domi::TaskDef> task_def_list;
(void)GenerateEndProfilingTask(op_desc, task_def_list);
auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE);
GE_CHECK_NOTNULL(end_desc);
end_desc->SetOpKernelLibName(kEngineNameRts);
node_ptr = compute_graph->AddNode(end_desc);
GE_CHECK_NOTNULL(node_ptr);
node_task_map[node_ptr] = task_def_list;
GELOGD("Create end profiling node success after.");
}

if (node_ptr != nullptr) {
for (const auto &task_def : task_def_list) {
hybrid_model_.task_defs_[node_ptr].emplace_back(task_def);
if (!node_task_map.empty()) {
for (const auto &node_task : node_task_map) {
NodePtr profiling_node = node_task.first;
vector<domi::TaskDef> task_def_lists = node_task.second;
for (const auto &task_def : task_def_lists) {
hybrid_model_.task_defs_[profiling_node].emplace_back(task_def);
}
NodeItem *node_item = nullptr;
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item));
node_item->input_start = 0;
node_item->output_start = 0;
graph_item.node_items_.emplace_back(node_item);
}
NodeItem *node_item = nullptr;
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item));
node_item->input_start = 0;
node_item->output_start = 0;
graph_item.node_items_.emplace_back(node_item);
} else {
GELOGD("No need to create profiling node after.");
}


+ 0
- 27
ge/hybrid/node_executor/task_context.cc View File

@@ -554,33 +554,6 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream
tmp_compute_graph_info.model_name = dynamic_model_name;
tmp_compute_graph_info.op_name = op_desc->GetName();
tmp_compute_graph_info.op_type = op_desc->GetType();
// default
if (op_desc->GetAllInputsSize() == 0) {
tmp_compute_graph_info.input_format = { FORMAT_NULL };
tmp_compute_graph_info.input_shape = { {0} };
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
}
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
if (input_desc == nullptr) {
continue;
}
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
}

if (op_desc->GetOutputsSize() == 0) {
tmp_compute_graph_info.output_format = { FORMAT_NULL };
tmp_compute_graph_info.output_shape = { {0} };
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
}
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
}
tmp_compute_graph_info.task_id = task_id;
tmp_compute_graph_info.stream_id = stream_id;
compute_graph_info.emplace_back(tmp_compute_graph_info);


Loading…
Cancel
Save