diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index f1c3c87b..d615187f 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -323,11 +323,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfileStepInfo( uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) { #ifdef DAVINCI_SUPPORT_PROFILING - rtError_t rt_ret = RT_ERROR_NONE; -#ifndef ONLY_COMPILE_OPEN_SRC + if (!is_load_profiling_ && subscribe_count_ == 0) { + GELOGD("Profiling is not turned on, no need to profile step info."); + return SUCCESS; + } + GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", index_id, model_id, tag_id); - rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); + rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx]Failed, ret 0x%X", rt_ret); REPORT_CALL_ERROR("E19999", "Call rtProfilerTraceEx failed, ret 0x%X", rt_ret); @@ -335,7 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil } GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", index_id, model_id, tag_id); -#endif mmTimespec timespec = mmGetTickCount(); // 1000 ^ 3 converts second to nanosecond diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 05c50a58..f8b61216 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -3979,7 +3979,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa is_dynamic_ = input_data.is_dynamic_batch; bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn(); - bool profiling_model_load_on = ProfilingManager::Instance().ProfilingModelLoadOn(); GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START)); Status ret = CopyModelData(input_data, output_data, is_dynamic_); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, @@ -3993,10 +3992,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa uint64_t model_id = static_cast(model_id_); int32_t device_id = static_cast(device_id_); // tag_id 0 means step begin, 1 meas step end. - if (profiling_model_load_on) { - GE_CHK_STATUS_RET_NOLOG( - ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); - } + GE_CHK_STATUS_RET_NOLOG( + ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); GELOGD("rtModelExecute do"); GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); @@ -4005,10 +4002,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); GELOGD("rtModelExecute end"); - if (profiling_model_load_on) { - GE_CHK_STATUS_RET_NOLOG( - ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id)); - } + GE_CHK_STATUS_RET_NOLOG( + ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id)); iterator_count_++; } diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index d4d97840..d8939175 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -90,7 +90,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, int32_t device_id = static_cast(device_id_); auto &prof_mgr = ProfilingManager::Instance(); // tag_id 0 means step begin, 1 meas step end. - if (!model_->IsSingleOp() && prof_mgr.ProfilingModelLoadOn()) { + if (!model_->IsSingleOp()) { GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id)); } @@ -98,7 +98,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, "Failed to execute partitioned call."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); - if (!model_->IsSingleOp() && prof_mgr.ProfilingModelLoadOn()) { + if (!model_->IsSingleOp()) { GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); } diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc index b4091a50..d2679439 100644 --- a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc +++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc @@ -86,4 +86,22 @@ TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { auto size = tensor.GetSize(); ASSERT_EQ(size, 100); } + +TEST_F(UtestHybridModelAsyncExecutor, Test_execute_internal) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = make_shared(graph); + ge_root_model->SetModelName("test_name"); + HybridModel hybrid_model(ge_root_model); + + HybridModelExecutor executor(&hybrid_model, 0, nullptr); + ASSERT_EQ(executor.Init(), SUCCESS); + auto &context = executor.context_; + GraphItem graph_item; + SubgraphExecutor subgraph_executor(&graph_item, &context); + HybridModelExecutor::ExecuteArgs args; + std::pair> eof_entry; + eof_entry.first = nullptr; + context.callback_manager->callback_queue_.Push(eof_entry); + ASSERT_EQ(executor.ExecuteGraphInternal(subgraph_executor, args), SUCCESS); +} } // namespace ge \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 5b246eed..40bc91f7 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -154,6 +154,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts send keypoint for step info. + */ +RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream); + /** * @ingroup profiling_base * @brief ts set profiling reporter callback.