diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index af9fce06..12293fc5 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -81,7 +81,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { Status ProfModelUnsubscribe(void *model); void StopProfiling(); bool ProfilingTrainingTraceOn() const { return is_training_trace_; } + // report model load profiling data flag, data contain task desc info, step info, model load fusion op info bool ProfilingModelLoadOn() const { return is_load_profiling_; } + // report model execute profiling data flag, data contain model execute time info bool ProfilingModelExecuteOn() const; // is_execute_profiling_ only used by ge option and env bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 678e5c63..32758f61 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -300,7 +300,7 @@ Status NodeDoneCallback::OnNodeDone() { GE_CHK_STATUS_RET(SaveDumpOpInfo(), "[Save][DumpOpInfo] Failed to dump op info."); } - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + if (ProfilingManager::Instance().ProfilingModelLoadOn()) { GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.", node_item.NodeName().c_str()); } diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 0468930a..59250d8c 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -563,7 +563,7 @@ const DumpProperties &TaskContext::GetDumpProperties() const { bool TaskContext::NeedCallback() { return node_item_->has_observer || IsDumpEnabled() || GraphExecutionContext::profiling_level > 0 || - !execution_context_->model->IsSingleOp(); + !execution_context_->model->IsSingleOp() || ProfilingManager::Instance().ProfilingModelLoadOn(); } Status TaskContext::Synchronize() { @@ -572,7 +572,7 @@ Status TaskContext::Synchronize() { Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, uint32_t block_dim) { - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + if (ProfilingManager::Instance().ProfilingModelLoadOn()) { const NodeItem &node_item = GetNodeItem(); auto op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); diff --git a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc index 92315448..07022230 100644 --- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc +++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc @@ -120,6 +120,11 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { NodeState node_state(*node_item, &subgraph_context); auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); + uint32_t task_id = 0; + uint32_t stream_id = 1; + std::string task_type = "rts"; + uint32_t block_dim = 0; + task_context->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); auto shared_task_context = std::shared_ptr(task_context.release()); node_state.SetTaskContext(shared_task_context);