From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.3.0
| @@ -708,6 +708,7 @@ target_compile_definitions(ge_runner PRIVATE | |||||
| DAVINCI_CLOUD | DAVINCI_CLOUD | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | FUNC_VISIBILITY | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_runner PRIVATE | target_compile_options(ge_runner PRIVATE | ||||
| @@ -783,6 +784,7 @@ target_compile_definitions(ge_compiler PRIVATE | |||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | FUNC_VISIBILITY | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_compiler PRIVATE | target_compile_options(ge_compiler PRIVATE | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "runtime/base.h" | #include "runtime/base.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "mmpa/mmpa_api.h" | |||||
| namespace { | namespace { | ||||
| const char *const kTrainingTrace = "training_trace"; | const char *const kTrainingTrace = "training_trace"; | ||||
| @@ -46,6 +47,10 @@ const std::string kOptype = "op_type"; | |||||
| const std::string kBlockDim = "block_dims"; | const std::string kBlockDim = "block_dims"; | ||||
| const std::string kTaskId = "task_id"; | const std::string kTaskId = "task_id"; | ||||
| const std::string kStreamId = "stream_id"; | const std::string kStreamId = "stream_id"; | ||||
| const std::string kThreadId = "thread_id"; | |||||
| const std::string kIndexId = "index_id"; | |||||
| const std::string kTimeStamp = "time_stamp"; | |||||
| const std::string kTagId = "tag_id"; | |||||
| const std::string kShapeType = "shape_type"; | const std::string kShapeType = "shape_type"; | ||||
| const std::string kCurIterNum = "cur_iter_num"; | const std::string kCurIterNum = "cur_iter_num"; | ||||
| const std::string kTaskType = "task_type"; | const std::string kTaskType = "task_type"; | ||||
| @@ -286,6 +291,58 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfileStepInfo( | |||||
| uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||||
| rtError_t rt_ret = RT_ERROR_NONE; | |||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u", | |||||
| index_id, model_id, tag_id); | |||||
| rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u", | |||||
| index_id, model_id, tag_id); | |||||
| #endif | |||||
| mmTimespec timespec = mmGetTickCount(); | |||||
| // 1000 ^ 3 converts second to nanosecond | |||||
| int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec; | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id); | |||||
| Json step_info; | |||||
| step_info[kIndexId] = index_id; | |||||
| step_info[kModelId] = model_id; | |||||
| step_info[kTimeStamp] = time; | |||||
| step_info[kTagId] = tag_id; | |||||
| step_info[kTaskId] = task_id; | |||||
| step_info[kStreamId] = stream_id; | |||||
| step_info[kThreadId] = mmGetTid(); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| } | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| ReportData(device_id, reported_data, "step_info"); | |||||
| #endif | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | ||||
| const int32_t &device_id, const string &data, const string &tag_name) { | const int32_t &device_id, const string &data, const string &tag_name) { | ||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include "framework/common/ge_types.h" | #include "framework/common/ge_types.h" | ||||
| #include "external/register/register_types.h" | #include "external/register/register_types.h" | ||||
| #include "toolchain/prof_callback.h" | #include "toolchain/prof_callback.h" | ||||
| #include "runtime/stream.h" | |||||
| using std::map; | using std::map; | ||||
| using std::string; | using std::string; | ||||
| @@ -97,6 +98,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | ||||
| void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | ||||
| void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | ||||
| Status ProfileStepInfo(uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id); | |||||
| private: | private: | ||||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | ||||
| Status ParseOptions(const std::string &options); | Status ParseOptions(const std::string &options); | ||||
| @@ -179,6 +179,7 @@ target_compile_definitions(ge_executor PRIVATE | |||||
| google=ascend_private | google=ascend_private | ||||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | ||||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| LOG_CPP | LOG_CPP | ||||
| ) | ) | ||||
| @@ -225,6 +226,7 @@ target_compile_definitions(ge_executor_shared PRIVATE | |||||
| DAVINCI_SUPPORT_PROFILING | DAVINCI_SUPPORT_PROFILING | ||||
| google=ascend_private | google=ascend_private | ||||
| FUNC_VISIBILITY | FUNC_VISIBILITY | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_executor_shared PRIVATE | target_include_directories(ge_executor_shared PRIVATE | ||||
| @@ -3873,12 +3873,27 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END)); | ||||
| if (!task_list_.empty()) { | if (!task_list_.empty()) { | ||||
| uint64_t index_id = iterator_count_ + 1; | |||||
| uint64_t model_id = static_cast<uint64_t>(model_id_); | |||||
| int32_t device_id = static_cast<int32_t>(device_id_); | |||||
| // tag_id 0 means step begin, 1 meas step end. | |||||
| if (profiling_model_execute_on) { | |||||
| GE_CHK_STATUS_RET_NOLOG( | |||||
| ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id)); | |||||
| } | |||||
| GELOGD("rtModelExecute do"); | GELOGD("rtModelExecute do"); | ||||
| GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START)); | ||||
| rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | ||||
| GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | ||||
| GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END)); | ||||
| GELOGD("rtModelExecute end"); | GELOGD("rtModelExecute end"); | ||||
| if (profiling_model_execute_on) { | |||||
| GE_CHK_STATUS_RET_NOLOG( | |||||
| ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id)); | |||||
| } | |||||
| iterator_count_++; | |||||
| } | } | ||||
| if (!is_async_mode_) { | if (!is_async_mode_) { | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/runtime_inference_context.h" | #include "graph/runtime_inference_context.h" | ||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "common/profiling/profiling_manager.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -77,10 +78,23 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | ||||
| uint64_t index_id = context_.iteration + 1; | |||||
| uint64_t model_id = static_cast<uint64_t>(model_->GetModelId()); | |||||
| int32_t device_id = static_cast<int32_t>(device_id_); | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| // tag_id 0 means step begin, 1 meas step end. | |||||
| if (prof_mgr.ProfilingModelExecuteOn()) { | |||||
| GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id)); | |||||
| } | |||||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | ||||
| "Failed to execute partitioned call."); | "Failed to execute partitioned call."); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
| if (prof_mgr.ProfilingModelExecuteOn()) { | |||||
| GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); | |||||
| } | |||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | ||||
| @@ -313,6 +313,8 @@ rtError_t rtFlushCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; } | |||||
| rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; } | rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; } | ||||
| rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream) { return RT_ERROR_NONE; } | |||||
| rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; } | rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; } | ||||
| rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) { | rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) { | ||||
| @@ -840,6 +840,7 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_HDRS}) | |||||
| target_compile_definitions(ge_ut_common PRIVATE | target_compile_definitions(ge_ut_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_ut_common PRIVATE | target_compile_options(ge_ut_common PRIVATE | ||||
| @@ -853,6 +854,7 @@ target_link_libraries(ge_ut_common PRIVATE | |||||
| ascend_protobuf | ascend_protobuf | ||||
| json | json | ||||
| ge_ut_graph | ge_ut_graph | ||||
| runtime_stub | |||||
| ) | ) | ||||
| # build common format | # build common format | ||||
| @@ -860,6 +862,7 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F | |||||
| target_compile_definitions(ge_ut_common_format PRIVATE | target_compile_definitions(ge_ut_common_format PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_ut_common_format PRIVATE | target_compile_options(ge_ut_common_format PRIVATE | ||||
| @@ -1012,6 +1015,7 @@ add_library(ge_single_op STATIC ${SINGLE_OP_SRC_FILES} ${PROTO_HDRS}) | |||||
| target_compile_definitions(ge_single_op PRIVATE | target_compile_definitions(ge_single_op PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_single_op PRIVATE | target_compile_options(ge_single_op PRIVATE | ||||
| @@ -1108,6 +1112,7 @@ target_compile_options(ut_libge_distinct_load_utest PRIVATE | |||||
| target_compile_definitions(ut_libge_distinct_load_utest PRIVATE | target_compile_definitions(ut_libge_distinct_load_utest PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ut_libge_distinct_load_utest | target_link_libraries(ut_libge_distinct_load_utest | ||||
| @@ -1022,6 +1022,10 @@ TEST_F(UtestDavinciModel, NnExecute) { | |||||
| EXPECT_EQ(outputs.size(), 1); | EXPECT_EQ(outputs.size(), 1); | ||||
| input_data.blobs = output_data.blobs; | input_data.blobs = output_data.blobs; | ||||
| EXPECT_EQ(input_data.blobs.size(), 1); | EXPECT_EQ(input_data.blobs.size(), 1); | ||||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||||
| ProfilingManager::Instance().device_id_.emplace_back(0); | |||||
| model.task_list_.resize(1); | |||||
| EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); | EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||