From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.3.0
@@ -323,11 +323,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfileStepInfo( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfileStepInfo( | ||||
uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) { | uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) { | ||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
rtError_t rt_ret = RT_ERROR_NONE; | |||||
#ifndef ONLY_COMPILE_OPEN_SRC | |||||
if (!is_load_profiling_ && subscribe_count_ == 0) { | |||||
GELOGD("Profiling is not turned on, no need to profile step info."); | |||||
return SUCCESS; | |||||
} | |||||
GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", | GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", | ||||
index_id, model_id, tag_id); | index_id, model_id, tag_id); | ||||
rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); | |||||
rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx]Failed, ret 0x%X", rt_ret); | GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx]Failed, ret 0x%X", rt_ret); | ||||
REPORT_CALL_ERROR("E19999", "Call rtProfilerTraceEx failed, ret 0x%X", rt_ret); | REPORT_CALL_ERROR("E19999", "Call rtProfilerTraceEx failed, ret 0x%X", rt_ret); | ||||
@@ -335,7 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil | |||||
} | } | ||||
GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", | GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", | ||||
index_id, model_id, tag_id); | index_id, model_id, tag_id); | ||||
#endif | |||||
mmTimespec timespec = mmGetTickCount(); | mmTimespec timespec = mmGetTickCount(); | ||||
// 1000 ^ 3 converts second to nanosecond | // 1000 ^ 3 converts second to nanosecond | ||||
@@ -3979,7 +3979,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
is_dynamic_ = input_data.is_dynamic_batch; | is_dynamic_ = input_data.is_dynamic_batch; | ||||
bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn(); | bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn(); | ||||
bool profiling_model_load_on = ProfilingManager::Instance().ProfilingModelLoadOn(); | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START)); | ||||
Status ret = CopyModelData(input_data, output_data, is_dynamic_); | Status ret = CopyModelData(input_data, output_data, is_dynamic_); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, | ||||
@@ -3993,10 +3992,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
uint64_t model_id = static_cast<uint64_t>(model_id_); | uint64_t model_id = static_cast<uint64_t>(model_id_); | ||||
int32_t device_id = static_cast<int32_t>(device_id_); | int32_t device_id = static_cast<int32_t>(device_id_); | ||||
// tag_id 0 means step begin, 1 meas step end. | // tag_id 0 means step begin, 1 meas step end. | ||||
if (profiling_model_load_on) { | |||||
GE_CHK_STATUS_RET_NOLOG( | |||||
ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG( | |||||
ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); | |||||
GELOGD("rtModelExecute do"); | GELOGD("rtModelExecute do"); | ||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); | ||||
@@ -4005,10 +4002,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); | ||||
GELOGD("rtModelExecute end"); | GELOGD("rtModelExecute end"); | ||||
if (profiling_model_load_on) { | |||||
GE_CHK_STATUS_RET_NOLOG( | |||||
ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id)); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG( | |||||
ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id)); | |||||
iterator_count_++; | iterator_count_++; | ||||
} | } | ||||
@@ -90,7 +90,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
int32_t device_id = static_cast<int32_t>(device_id_); | int32_t device_id = static_cast<int32_t>(device_id_); | ||||
auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
// tag_id 0 means step begin, 1 meas step end. | // tag_id 0 means step begin, 1 meas step end. | ||||
if (!model_->IsSingleOp() && prof_mgr.ProfilingModelLoadOn()) { | |||||
if (!model_->IsSingleOp()) { | |||||
GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id)); | GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id)); | ||||
} | } | ||||
@@ -98,7 +98,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
"Failed to execute partitioned call."); | "Failed to execute partitioned call."); | ||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
if (!model_->IsSingleOp() && prof_mgr.ProfilingModelLoadOn()) { | |||||
if (!model_->IsSingleOp()) { | |||||
GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); | GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); | ||||
} | } | ||||
@@ -86,4 +86,22 @@ TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { | |||||
auto size = tensor.GetSize(); | auto size = tensor.GetSize(); | ||||
ASSERT_EQ(size, 100); | ASSERT_EQ(size, 100); | ||||
} | } | ||||
TEST_F(UtestHybridModelAsyncExecutor, Test_execute_internal) { | |||||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
ge_root_model->SetModelName("test_name"); | |||||
HybridModel hybrid_model(ge_root_model); | |||||
HybridModelExecutor executor(&hybrid_model, 0, nullptr); | |||||
ASSERT_EQ(executor.Init(), SUCCESS); | |||||
auto &context = executor.context_; | |||||
GraphItem graph_item; | |||||
SubgraphExecutor subgraph_executor(&graph_item, &context); | |||||
HybridModelExecutor::ExecuteArgs args; | |||||
std::pair<rtEvent_t, std::pair<rtCallback_t, void *>> eof_entry; | |||||
eof_entry.first = nullptr; | |||||
context.callback_manager->callback_queue_.Push(eof_entry); | |||||
ASSERT_EQ(executor.ExecuteGraphInternal(subgraph_executor, args), SUCCESS); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -154,6 +154,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t | |||||
*/ | */ | ||||
RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); | RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); | ||||
/** | |||||
* @ingroup profiling_base | |||||
* @brief ts send keypoint for step info. | |||||
*/ | |||||
RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream); | |||||
/** | /** | ||||
* @ingroup profiling_base | * @ingroup profiling_base | ||||
* @brief ts set profiling reporter callback. | * @brief ts set profiling reporter callback. | ||||