| @@ -84,7 +84,6 @@ else () | |||
| set(STATIC_ACL_LIB ${GE_LIB_PATH}) | |||
| find_module(slog libalog.so ${GE_LIB_PATH}) | |||
| find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) | |||
| find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) | |||
| find_module(hccl libhccl.so ${GE_LIB_PATH}) | |||
| find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | |||
| find_module(runtime libruntime.so ${GE_LIB_PATH}) | |||
| @@ -106,7 +105,6 @@ else () | |||
| elseif(PLATFORM STREQUAL "inference") | |||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
| if(PRODUCT STREQUAL "flr3") | |||
| elseif(PRODUCT STREQUAL "flr1") | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
| @@ -120,7 +118,6 @@ else () | |||
| find_module(runtime libruntime.so ${ASCEND_ATC_DIR}) | |||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_ATC_DIR}/stub) | |||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
| else() | |||
| message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | |||
| endif() | |||
| @@ -116,6 +116,7 @@ set(EXECUTOR_SRC_LIST | |||
| "common/ge/plugin_manager.cc" | |||
| "common/profiling/ge_profiling.cc" | |||
| "common/profiling/profiling_manager.cc" | |||
| "common/profiling/command_handle.cc" | |||
| "executor/ge_executor.cc" | |||
| "ge_local_engine/engine/host_cpu_engine.cc" | |||
| "graph/build/memory/var_mem_assign_util.cc" | |||
| @@ -259,10 +260,9 @@ set(EXECUTOR_SRC_LIST | |||
| ################################################################## | |||
| set(COMPILER_SRC_LIST | |||
| "analyzer/analyzer.cc" | |||
| "common/dump/dump_op.cc" | |||
| #"common/dump/dump_op.cc" | |||
| "common/ge/op_tiling_manager.cc" | |||
| "common/ge/plugin_manager.cc" | |||
| "common/profiling/profiling_manager.cc" | |||
| "engine_manager/dnnengine_manager.cc" | |||
| "ge_local_engine/engine/host_cpu_engine.cc" | |||
| "ge_opt_info/ge_opt_info.cc" | |||
| @@ -473,7 +473,7 @@ set(RUNNER_SRC_LIST | |||
| "client/ge_api.cc" | |||
| "session/inner_session.cc" | |||
| "session/session_manager.cc" | |||
| "common/profiling/ge_runner_profiling.cc" | |||
| "common/profiling/profiling_init.cc" | |||
| "graph/manager/memory_api.cc" | |||
| "graph/manager/util/hcom_util.cc" | |||
| "graph/load/model_manager/task_info/hccl_task_info.cc" | |||
| @@ -568,6 +568,8 @@ target_link_libraries(ge_runner PRIVATE | |||
| graph | |||
| ge_common | |||
| ascend_protobuf | |||
| ge_executor_shared | |||
| msprofiler_fwk_share | |||
| register | |||
| c_sec | |||
| slog | |||
| @@ -35,6 +35,11 @@ | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "toolchain/plog.h" | |||
| #include "ir_build/option_utils.h" | |||
| #include "framework/common/ge_types.h" | |||
| #include "external/ge/ge_api_types.h" | |||
| #include "graph/ge_context.h" | |||
| #include "common/profiling/profiling_init.h" | |||
| #include "common/profiling/profiling_properties.h" | |||
| using domi::OpRegistry; | |||
| using std::map; | |||
| @@ -43,6 +48,89 @@ using std::vector; | |||
| namespace { | |||
| const int32_t kMaxStrLen = 128; | |||
| const int kDecimal = 10; | |||
| const int kDefaultDeviceIdForTrain = 0; | |||
| const int kDefaultDeviceIdForInfer = -1; | |||
| void InitOptions(const map<string, string> &option_map, ge::Options &options) { | |||
| GELOGD("InitOptions start"); | |||
| options.session_id = 0; | |||
| auto is_train_mode = false; | |||
| auto iter = option_map.find(ge::OPTION_GRAPH_RUN_MODE); | |||
| if (iter != option_map.end()) { | |||
| if (ge::GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= ge::TRAIN) { | |||
| is_train_mode = true; | |||
| } | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_SESSION_ID); | |||
| if (iter != option_map.end()) { | |||
| options.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | |||
| } | |||
| options.device_id = is_train_mode ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; | |||
| iter = option_map.find(ge::OPTION_EXEC_DEVICE_ID); | |||
| if (iter != option_map.end()) { | |||
| options.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal)); | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_JOB_ID); | |||
| if (iter != option_map.end()) { | |||
| options.job_id = iter->second.c_str(); | |||
| } | |||
| options.isUseHcom = false; | |||
| iter = option_map.find(ge::OPTION_EXEC_IS_USEHCOM); | |||
| if (iter != option_map.end()) { | |||
| std::istringstream(iter->second) >> options.isUseHcom; | |||
| } | |||
| options.isUseHvd = false; | |||
| iter = option_map.find(ge::OPTION_EXEC_IS_USEHVD); | |||
| if (iter != option_map.end()) { | |||
| std::istringstream(iter->second) >> options.isUseHvd; | |||
| } | |||
| options.deployMode = false; | |||
| iter = option_map.find(ge::OPTION_EXEC_DEPLOY_MODE); | |||
| if (iter != option_map.end()) { | |||
| std::istringstream(iter->second) >> options.deployMode; | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_POD_NAME); | |||
| if (iter != option_map.end()) { | |||
| options.podName = iter->second.c_str(); | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_PROFILING_MODE); | |||
| if (iter != option_map.end()) { | |||
| options.profiling_mode = iter->second.c_str(); | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_PROFILING_OPTIONS); | |||
| if (iter != option_map.end()) { | |||
| options.profiling_options = iter->second.c_str(); | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_RANK_ID); | |||
| if (iter != option_map.end()) { | |||
| options.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | |||
| } | |||
| iter = option_map.find(ge::OPTION_EXEC_RANK_TABLE_FILE); | |||
| if (iter != option_map.end()) { | |||
| options.rankTableFile = iter->second.c_str(); | |||
| } | |||
| options.enable_atomic = true; | |||
| iter = option_map.find(ge::OPTION_EXEC_ATOMIC_FLAG); | |||
| GE_IF_BOOL_EXEC(iter != option_map.end(), | |||
| options.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal)); | |||
| GELOGD("ge InnerInitialize, the enable_atomic_flag in options_ is %d", options.enable_atomic); | |||
| } | |||
| void InitProfiling(ge::Options &options) { | |||
| GELOGD("InitProfiling start"); | |||
| ge::GetContext().Init(); | |||
| // Profiling init | |||
| if (ge::ProfilingInit::Instance().Init(options) != ge::SUCCESS) { | |||
| GELOGW("Profiling init failed."); | |||
| } | |||
| } | |||
| void ShutDownProfiling() { | |||
| GELOGD("Profiling shut down"); | |||
| if (ge::ProfilingProperties::Instance().ProfilingOn()) { | |||
| ge::ProfilingInit::Instance().ShutDownProfiling(); | |||
| } | |||
| } | |||
| } // namespace | |||
| static bool g_ge_initialized = false; | |||
| @@ -128,6 +216,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) { | |||
| if (CheckOptionsValid(options) != SUCCESS) { | |||
| return FAILED; | |||
| } | |||
| ge::Options str_options; | |||
| InitOptions(options, str_options); | |||
| InitProfiling(str_options); | |||
| GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid"); | |||
| ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOpsProtoInit); | |||
| @@ -208,7 +299,7 @@ Status GEFinalize() { | |||
| GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize"); | |||
| return SUCCESS; | |||
| } | |||
| ShutDownProfiling(); | |||
| ErrorManager::GetInstance().SetStage(error_message::kFinalize, error_message::kFinalize); | |||
| ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||
| GELOGT(TRACE_INIT, "GEFinalize start"); | |||
| @@ -50,6 +50,7 @@ set(SRC_LIST | |||
| "${GE_CODE_DIR}/ge/common/transop_util.cc" | |||
| "${GE_CODE_DIR}/ge/common/types.cc" | |||
| "${GE_CODE_DIR}/ge/common/util.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc" | |||
| ) | |||
| if (NOT ENABLE_D AND NOT ENABLE_ACL) | |||
| @@ -0,0 +1,268 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "command_handle.h" | |||
| #include "runtime/base.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "framework/omg/omg_inner_types.h" | |||
| #include "graph/load/graph_loader.h" | |||
| namespace { | |||
| const uint32_t kDeviceListIndex = 3; | |||
| const uint32_t kCommandNum = 6; | |||
| const int kMaxDevNum = 64; | |||
| const std::string kDeviceNums = "devNums"; | |||
| const std::string kDeviceIdList = "devIdList"; | |||
| const std::string kProfilingInit = "prof_init"; | |||
| const std::string kProfilingFinalize = "prof_finalize"; | |||
| const std::string kProfilingStart = "prof_start"; | |||
| const std::string kProfilingStop = "prof_stop"; | |||
| const std::string kProfilingModelSubscribe = "prof_model_subscribe"; | |||
| const std::string kProfilingModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
| const std::string kProfilingModelId = "modelId"; | |||
| enum ProfCommandHandleType { | |||
| kProfCommandhandleInit = 0, | |||
| kProfCommandhandleStart, | |||
| kProfCommandhandleStop, | |||
| kProfCommandhandleFinalize, | |||
| kProfCommandhandleModelSubscribe, | |||
| kProfCommandhandleModelUnsubscribe | |||
| }; | |||
| const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = { | |||
| {kProfCommandhandleInit, kProfilingInit}, | |||
| {kProfCommandhandleStart, kProfilingStart}, | |||
| {kProfCommandhandleStop, kProfilingStop}, | |||
| {kProfCommandhandleFinalize, kProfilingFinalize}, | |||
| {kProfCommandhandleModelSubscribe, kProfilingModelSubscribe}, | |||
| {kProfCommandhandleModelUnsubscribe, kProfilingModelUnsubscribe}}; | |||
| bool IsProfTypeValid(uint32_t type) { | |||
| if (type < 0 || type >= kCommandNum) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Type]Type %u is invalid", type); | |||
| return false; | |||
| } | |||
| GELOGD("Type is %u", type); | |||
| return true; | |||
| } | |||
| bool IsProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
| if (deviceid_list == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "Device id list is nullptr"); | |||
| return false; | |||
| } | |||
| if (device_nums == 0 || device_nums > kMaxDevNum) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums); | |||
| REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums); | |||
| return false; | |||
| } | |||
| // real device num | |||
| int32_t dev_count = 0; | |||
| rtError_t rt_err = rtGetDeviceCount(&dev_count); | |||
| if (rt_err != RT_ERROR_NONE) { | |||
| GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err); | |||
| REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err); | |||
| return false; | |||
| } | |||
| if (device_nums > static_cast<uint32_t>(dev_count)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", device_nums, dev_count); | |||
| REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", device_nums, dev_count); | |||
| return false; | |||
| } | |||
| std::set<uint32_t> record; | |||
| for (size_t i = 0; i < device_nums; ++i) { | |||
| uint32_t dev_id = deviceid_list[i]; | |||
| if (dev_id >= static_cast<uint32_t>(dev_count)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", dev_id, dev_count); | |||
| REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); | |||
| return false; | |||
| } | |||
| if (record.count(dev_id) > 0) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id); | |||
| REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id); | |||
| return false; | |||
| } | |||
| record.insert(dev_id); | |||
| } | |||
| return true; | |||
| } | |||
| bool TransProfConfigToParam(const rtProfCommandHandle &profCommand, vector<string> &prof_config_params) { | |||
| prof_config_params.clear(); | |||
| prof_config_params.emplace_back(kDeviceNums); | |||
| prof_config_params.emplace_back(std::to_string(profCommand.devNums)); | |||
| prof_config_params.emplace_back(kDeviceIdList); | |||
| std::string devID = ""; | |||
| if (profCommand.devNums == 0) { | |||
| GELOGE(ge::FAILED, "[Check][Param]The device num is invalid."); | |||
| return false; | |||
| } | |||
| for (uint32_t i = 0; i < profCommand.devNums; i++) { | |||
| devID.append(std::to_string(profCommand.devIdList[i])); | |||
| if (i != profCommand.devNums - 1) { | |||
| devID.append(","); | |||
| } | |||
| } | |||
| prof_config_params.push_back(devID); | |||
| return true; | |||
| } | |||
| ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, uint32_t graph_id, | |||
| vector<string> &prof_params) { | |||
| if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) { | |||
| prof_params.clear(); | |||
| prof_params.emplace_back(kProfilingModelId); | |||
| uint32_t model_id = graph_id; | |||
| if (is_subscribe) { | |||
| auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
| auto ret = profiling_manager.GetModelIdFromGraph(graph_id, model_id); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "[Get][GraphId]graph_id:%u not not found", graph_id); | |||
| return ret; | |||
| } | |||
| } | |||
| prof_params.emplace_back(std::to_string(model_id)); | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| rtError_t NeedHandleStartEnd(ProfCommandHandleType type, rtProfCommandHandle_t *prof_config_param, | |||
| std::vector<string> &prof_params) { | |||
| if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
| if (!IsProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { | |||
| return ge::FAILED; | |||
| } | |||
| if (!TransProfConfigToParam(*prof_config_param, prof_params)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); | |||
| REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| rtError_t NeedHandleModelSubscribe(ProfCommandHandleType type, rtProfCommandHandle_t *prof_config_param, | |||
| std::vector<string> &prof_params) { | |||
| if (type == kProfCommandhandleModelSubscribe) { | |||
| auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
| auto is_train = domi::GetContext().train_flag; | |||
| if (is_train) { | |||
| profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true); | |||
| return ge::SUCCESS; | |||
| } | |||
| prof_params.clear(); | |||
| prof_params.push_back(kProfilingModelId); | |||
| prof_params.push_back(std::to_string(prof_config_param->modelId)); | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| rtError_t ExecuteCommand(ProfCommandHandleType type, | |||
| std::map<ProfCommandHandleType, std::string>::const_iterator iter, | |||
| rtProfCommandHandle_t *prof_config_param, std::vector<string> &prof_params) { | |||
| ge::GraphLoader graph_loader; | |||
| ge::Command command; | |||
| command.cmd_params.clear(); | |||
| command.cmd_type = iter->second; | |||
| command.cmd_params = prof_params; | |||
| if (type != kProfCommandhandleFinalize) { | |||
| command.module_index = prof_config_param->profSwitch; | |||
| } | |||
| GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), | |||
| command.module_index); | |||
| if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
| GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
| } | |||
| ge::Status ret = graph_loader.CommandHandle(command); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u", | |||
| iter->second.c_str(), ret); | |||
| REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u", | |||
| iter->second.c_str(), ret); | |||
| return ge::FAILED; | |||
| } | |||
| GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); | |||
| return ge::SUCCESS; | |||
| } | |||
| rtError_t HandleCtrlSwitch(void *data) { | |||
| auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
| rtProfCommandHandle_t *prof_config_param = reinterpret_cast<rtProfCommandHandle_t *>(data); | |||
| if (!IsProfTypeValid(prof_config_param->type)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand is invalid."); | |||
| return ge::FAILED; | |||
| } | |||
| auto type = static_cast<ProfCommandHandleType>(prof_config_param->type); | |||
| if (type != kProfCommandhandleFinalize) { | |||
| GE_CHECK_NOTNULL(data); | |||
| } | |||
| auto iter = kProfCommandTypeMap.find(type); | |||
| if (iter == kProfCommandTypeMap.end()) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand type is invalid."); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| std::vector<string> prof_params; | |||
| ge::Status ret = NeedHandleStartEnd(type, prof_config_param, prof_params); | |||
| if (ret != ge::SUCCESS) { | |||
| return ret; | |||
| } | |||
| ret = NeedHandleModelSubscribe(type, prof_config_param, prof_params); | |||
| if (ret != ge::SUCCESS) { | |||
| return ret; | |||
| } | |||
| auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe; | |||
| // GraphId is actually stored in prof_config_param | |||
| auto graph_id = prof_config_param->modelId; | |||
| ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "[Check][Param]graph_id:%u not not found", graph_id); | |||
| REPORT_INPUT_ERROR( | |||
| "E10001", std::vector<std::string>({"value", "parameter", "reason"}), | |||
| std::vector<std::string>({std::to_string(graph_id), "GraphToModelMap", "graph_id does not exist!"})); | |||
| return ge::FAILED; | |||
| } | |||
| return ExecuteCommand(type, iter, prof_config_param, prof_params); | |||
| } | |||
| } // namespace | |||
| namespace ge { | |||
| rtError_t CommandHandle(uint32_t rt_type, void *data, uint32_t len) { | |||
| if (data == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]The prof comand is invalid."); | |||
| return ge::FAILED; | |||
| } | |||
| auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
| if (rt_type == RT_PROF_CTRL_REPORTER) { | |||
| profiling_manager.SetMsprofReporterCallback(reinterpret_cast<MsprofReporterCallback>(data)); | |||
| GELOGD("return with MsprofReporterCallback"); | |||
| return ge::SUCCESS; | |||
| } else if (rt_type == RT_PROF_CTRL_SWITCH) { | |||
| return HandleCtrlSwitch(data); | |||
| } | |||
| return ge::FAILED; | |||
| } | |||
| } // namespace ge | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -14,13 +14,13 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "framework/common/profiling/ge_runner_profiling.h" | |||
| #include "init/gelib.h" | |||
| #ifndef GE_COMMON_PROFILING_COMMAND_HANDLE_H_ | |||
| #define GE_COMMON_PROFILING_COMMAND_HANDLE_H_ | |||
| bool IsInitialize() { | |||
| std::shared_ptr<ge::GELib> instance_ptr = ge::GELib::GetInstance(); | |||
| if (instance_ptr == nullptr || instance_ptr->InitFlag() == false) { | |||
| return false; | |||
| } | |||
| return true; | |||
| #include "ge/ge_api_error_codes.h" | |||
| #include "runtime/base.h" | |||
| namespace ge { | |||
| GE_FUNC_VISIBILITY rtError_t CommandHandle(uint32_t rt_type, void *data, uint32_t len); | |||
| } | |||
| #endif // GE_COMMON_PROFILING_COMMAND_HANDLE_H_ | |||
| @@ -19,245 +19,15 @@ | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "graph/load/graph_loader.h" | |||
| #include "graph/ge_context.h" | |||
| #include "init/gelib.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "common/model/ge_model.h" | |||
| #include "framework/omg/omg_inner_types.h" | |||
| namespace { | |||
| const uint32_t kDeviceListIndex = 3; | |||
| const std::string kDeviceNums = "devNums"; | |||
| const std::string kDeviceIdList = "devIdList"; | |||
| const std::string kProfilingInit = "prof_init"; | |||
| const std::string kProfilingFinalize = "prof_finalize"; | |||
| const std::string kProfilingStart = "prof_start"; | |||
| const std::string kProfilingStop = "prof_stop"; | |||
| const std::string kProfModelSubscribe = "prof_model_subscribe"; | |||
| const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
| const std::string kRtSetDeviceRegName = "profiling"; | |||
| const std::string kPofilingModelId = "modelId"; | |||
| const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = { | |||
| {kProfCommandhandleInit, kProfilingInit}, | |||
| {kProfCommandhandleStart, kProfilingStart}, | |||
| {kProfCommandhandleStop, kProfilingStop}, | |||
| {kProfCommandhandleFinalize, kProfilingFinalize}, | |||
| {kProfCommandhandleModelSubscribe, kProfModelSubscribe}, | |||
| {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; | |||
| const uint64_t kModelId = ge::INVALID_MODEL_ID; | |||
| const uint16_t kStepStart = 0; | |||
| const uint16_t kStepEnd = 1; | |||
| ge::Status NeedUnsubscribe(ProfCommandHandleType type, bool is_subscribe, | |||
| uint32_t graph_id, vector<string> &prof_params) { | |||
| if (type == kProfCommandhandleModelUnsubscribe && is_subscribe) { | |||
| prof_params.clear(); | |||
| prof_params.emplace_back(kPofilingModelId); | |||
| uint32_t model_id = 0; | |||
| auto ret = ge::ProfilingManager::Instance().GetModelIdFromGraph(graph_id, model_id); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "graph_id:%u not not found", graph_id); | |||
| return ret; | |||
| } | |||
| prof_params.emplace_back(std::to_string(model_id)); | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| } // namespace | |||
| bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) { | |||
| prof_config_params.clear(); | |||
| prof_config_params.emplace_back(kDeviceNums); | |||
| prof_config_params.emplace_back(std::to_string(profCommand.devNums)); | |||
| prof_config_params.emplace_back(kDeviceIdList); | |||
| std::string devID = ""; | |||
| if (profCommand.devNums == 0) { | |||
| GELOGW("The device num is invalid."); | |||
| return false; | |||
| } | |||
| for (uint32_t i = 0; i < profCommand.devNums; i++) { | |||
| devID.append(std::to_string(profCommand.devIdList[i])); | |||
| if (i != profCommand.devNums - 1) { | |||
| devID.append(","); | |||
| } | |||
| } | |||
| prof_config_params.push_back(devID); | |||
| return true; | |||
| } | |||
| bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
| if (deviceid_list == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceIDList]Invalid, it is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "Device id list is nullptr"); | |||
| return false; | |||
| } | |||
| if (device_nums == 0 || device_nums > MAX_DEV_NUM) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceNums]Invalid, device nums: %u", device_nums); | |||
| REPORT_INNER_ERROR("E19999", "DeviceNums %u check invalid", device_nums); | |||
| return false; | |||
| } | |||
| // real device num | |||
| int32_t dev_count = 0; | |||
| rtError_t rt_err = rtGetDeviceCount(&dev_count); | |||
| if (rt_err != RT_ERROR_NONE) { | |||
| GELOGE(ge::INTERNAL_ERROR, "[Get][DeviceCount]Failed, error_code %d", rt_err); | |||
| REPORT_CALL_ERROR("E19999", "Get device count failed, error_code %d", rt_err); | |||
| return false; | |||
| } | |||
| if (device_nums > static_cast<uint32_t>(dev_count)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Device num %u is not in range [1,%d]", | |||
| device_nums, dev_count); | |||
| REPORT_INNER_ERROR("E19999", "Device num %u check invalid, it is not in range [1,%d]", | |||
| device_nums, dev_count); | |||
| return false; | |||
| } | |||
| std::set<uint32_t> record; | |||
| for (size_t i = 0; i < device_nums; ++i) { | |||
| uint32_t dev_id = deviceid_list[i]; | |||
| if (dev_id >= static_cast<uint32_t>(dev_count)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is not in range [0,%d)", | |||
| dev_id, dev_count); | |||
| REPORT_CALL_ERROR("E19999", "Device id %u is not in range [0,%d)", dev_id, dev_count); | |||
| return false; | |||
| } | |||
| if (record.count(dev_id) > 0) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][DeviceId]Device id %u is duplicatedly set", dev_id); | |||
| REPORT_CALL_ERROR("E19999", "Device id %u is not unique, duplicatedly set", dev_id); | |||
| return false; | |||
| } | |||
| record.insert(dev_id); | |||
| } | |||
| return true; | |||
| } | |||
| ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { | |||
| if (func == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Msprof ctrl callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "Msprof ctrl callback is nullptr"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | |||
| GELOGW("Msprof ctrl callback is exist, just ignore it."); | |||
| } else { | |||
| ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { | |||
| if (func == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofSetDeviceCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofSetDeviceCallback callback is nullptr"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| // Pass MsprofSetDeviceCallback to runtime | |||
| ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); | |||
| if (rt_ret != ge::SUCCESS) { | |||
| GELOGE(rt_ret, "[Pass][MsprofSetDeviceCallback]To runtime failed, ret 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Pass MsprofSetDeviceCallback to runtime failed, ret 0x%X", rt_ret); | |||
| return rt_ret; | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status RegProfReporterCallback(MsprofReporterCallback func) { | |||
| if (func == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofReporterCallback != nullptr) { | |||
| GELOGW("Msprof reporter callback is exist, just ignore it."); | |||
| } else { | |||
| GELOGI("GE register Msprof reporter callback."); | |||
| ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); | |||
| // Pass MsprofReporterCallback to runtime | |||
| ge::Status rt_ret = rtSetMsprofReporterCallback(func); | |||
| if (rt_ret != ge::SUCCESS) { | |||
| GELOGE(rt_ret, "[Pass][Param]Pass MsprofReporterCallback to runtime failed, error_code %u", | |||
| rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Pass MsprofReporterCallback to runtime failed, error_code %u", | |||
| rt_ret); | |||
| return rt_ret; | |||
| } | |||
| // Pass MsprofReporterCallback to hccl | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { | |||
| if (type != kProfCommandhandleFinalize) { | |||
| GE_CHECK_NOTNULL(data); | |||
| } | |||
| ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data); | |||
| auto iter = kProfCommandTypeMap.find(type); | |||
| if (iter == kProfCommandTypeMap.end()) { | |||
| GELOGW("The prof comand type is invalid."); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| std::vector<string> prof_params; | |||
| if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
| if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { | |||
| return ge::FAILED; | |||
| } | |||
| if (!TransProfConfigToParam(*prof_config_param, prof_params)) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Transfer profilerConfig to string vector failed"); | |||
| REPORT_CALL_ERROR("E19999", "Transfer profilerConfig to string vector failed"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| } | |||
| auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
| auto is_train = domi::GetContext().train_flag; | |||
| if (type == kProfCommandhandleModelSubscribe && is_train) { | |||
| profiling_manager.SetSubscribeInfo(prof_config_param->profSwitch, prof_config_param->modelId, true); | |||
| return ge::SUCCESS; | |||
| } | |||
| auto is_subscribe = profiling_manager.GetSubscribeInfo().is_subscribe; | |||
| // GraphId is actually stored in prof_config_param | |||
| auto graph_id = prof_config_param->modelId; | |||
| ge::Status ret = NeedUnsubscribe(type, is_subscribe, graph_id, prof_params); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "graph_id:%u not not found", graph_id); | |||
| REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"value", "parameter", "reason"}), | |||
| std::vector<std::string>({std::to_string(graph_id), | |||
| "GraphToModelMap", | |||
| "graph_id does not exist!"})); | |||
| return ge::FAILED; | |||
| } | |||
| ge::GraphLoader graph_loader; | |||
| ge::Command command; | |||
| command.cmd_params.clear(); | |||
| command.cmd_type = iter->second; | |||
| command.cmd_params = prof_params; | |||
| if (type != kProfCommandhandleFinalize) { | |||
| command.module_index = prof_config_param->profSwitch; | |||
| } | |||
| GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), | |||
| command.module_index); | |||
| if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
| GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
| } | |||
| ret = graph_loader.CommandHandle(command); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ret, "[Handle][Command]Handle profiling command failed, command type %s, error_code %u", | |||
| iter->second.c_str(), ret); | |||
| REPORT_CALL_ERROR("E19999", "Handle profiling command failed, command type %s, error_code %u", | |||
| iter->second.c_str(), ret); | |||
| return ge::FAILED; | |||
| } | |||
| GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream) { | |||
| static bool is_first_run = true; | |||
| int32_t device_id = 0; | |||
| @@ -289,3 +59,7 @@ ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream | |||
| ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id) { | |||
| return ge::ProfilingManager::Instance().GetDeviceIdFromGraph(graph_id, device_id); | |||
| } | |||
| void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id) { | |||
| ge::ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id); | |||
| } | |||
| @@ -0,0 +1,247 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "profiling_init.h" | |||
| #include "common/properties_manager.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "common/profiling/profiling_properties.h" | |||
| #include "runtime/base.h" | |||
| #include "common/profiling/command_handle.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| namespace { | |||
| const char *const kTrainingTrace = "training_trace"; | |||
| const char *const kFpPoint = "fp_point"; | |||
| const char *const kBpPoint = "bp_point"; | |||
| } | |||
| namespace ge { | |||
| ProfilingInit &ProfilingInit::Instance() { | |||
| static ProfilingInit profiling_init; | |||
| return profiling_init; | |||
| } | |||
| ge::Status ProfilingInit::Init(const Options &options) { | |||
| GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); | |||
| struct MsprofGeOptions prof_conf = {{0}}; | |||
| bool is_execute_profiling = false; | |||
| Status ret = InitFromOptions(options, prof_conf, is_execute_profiling); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); | |||
| REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); | |||
| return ret; | |||
| } | |||
| ProfRegisterCtrlCallback(); | |||
| if (is_execute_profiling) { | |||
| int32_t cb_ret = MsprofInit(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
| static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions)); | |||
| if (cb_ret != 0) { | |||
| GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); | |||
| REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); | |||
| return FAILED; | |||
| } | |||
| GELOGI("Profiling init success"); | |||
| } | |||
| else { | |||
| GELOGI("The profiling is off, skip the initialization"); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| ge::Status ProfilingInit::ProfRegisterCtrlCallback() {; | |||
| rtProfCtrlHandle callback = CommandHandle; | |||
| rtError_t rt_ret = rtProfRegisterCtrlCallback(GE,callback); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(FAILED, "Register CtrlCallBack failed"); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| ge::Status ProfilingInit::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, | |||
| bool &is_execute_profiling) { | |||
| // enable profiling by env | |||
| char env_profiling_mode[MMPA_MAX_PATH] = {0x00}; | |||
| if (options.profiling_mode == "1" && !options.profiling_options.empty()) { | |||
| // enable profiling by ge option | |||
| if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), | |||
| MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { | |||
| GELOGE(INTERNAL_ERROR, "[copy][ProfilingOptions]Failed, options %s", options.profiling_options.c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Copy profiling_options %s failed", options.profiling_options.c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| is_execute_profiling = true; | |||
| GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, | |||
| options.profiling_options.c_str()); | |||
| } else { | |||
| (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); | |||
| (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); | |||
| // The env is invalid | |||
| if ((strcmp("true", env_profiling_mode) != 0) || (strcmp(prof_conf.options, "\0") == 0)) { | |||
| return SUCCESS; | |||
| } | |||
| // enable profiling by env | |||
| is_execute_profiling = true; | |||
| GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options); | |||
| } | |||
| ProfilingProperties::Instance().SetExecuteProfiling(is_execute_profiling); | |||
| ProfilingProperties::Instance().SetLoadProfiling(true); | |||
| if (!is_execute_profiling) { | |||
| return SUCCESS; | |||
| } | |||
| // Parse json str for bp fp | |||
| Status ret = ParseOptions(prof_conf.options); | |||
| if (ret != ge::SUCCESS) { | |||
| GELOGE(ge::PARAM_INVALID, "[Parse][Options]Parse training trace param %s failed, error_code %u", prof_conf.options, | |||
| ret); | |||
| REPORT_CALL_ERROR("E19999", "Parse training trace param %s failed, error_code %u", prof_conf.options, ret); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) != | |||
| EOK) { | |||
| GELOGE(INTERNAL_ERROR, "[Copy][JobId]Failed, original job_id %s", options.job_id.c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Copy job_id %s failed", options.job_id.c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str()); | |||
| return ge::SUCCESS; | |||
| } | |||
| ge::Status ProfilingInit::ParseOptions(const std::string &options) { | |||
| if (options.empty()) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Profiling options is empty"); | |||
| REPORT_INNER_ERROR("E19999", "Profiling options is empty"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| try { | |||
| Json prof_options = Json::parse(options); | |||
| if (options.find(kTrainingTrace) == std::string::npos) { | |||
| return ge::SUCCESS; | |||
| } | |||
| std::string training_trace; | |||
| if (prof_options.contains(kTrainingTrace)) { | |||
| training_trace = prof_options[kTrainingTrace]; | |||
| } | |||
| if (training_trace.empty()) { | |||
| GELOGI("Training trace will not take effect."); | |||
| return ge::SUCCESS; | |||
| } | |||
| GELOGI("GE profiling training trace:%s", training_trace.c_str()); | |||
| if (training_trace != "on") { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]Training trace param:%s is invalid.", training_trace.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str()); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| string fp_point; | |||
| string bp_point; | |||
| if (prof_options.contains(kFpPoint)) { | |||
| fp_point = prof_options[kFpPoint]; | |||
| } | |||
| if (prof_options.contains(kBpPoint)) { | |||
| bp_point = prof_options[kBpPoint]; | |||
| } | |||
| if (!fp_point.empty() && !bp_point.empty()) { | |||
| GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point.c_str(), fp_point.c_str()); | |||
| } | |||
| ProfilingProperties::Instance().SetTrainingTrace(true); | |||
| ProfilingProperties::Instance().SetFpBpPoint(fp_point,bp_point); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "[Check][Param]Json prof_conf options is invalid"); | |||
| REPORT_INNER_ERROR("E19999", "Json prof_conf options is invalid"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| void ProfilingInit::StopProfiling() { | |||
| uint64_t module = GetProfilingModule(); | |||
| // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal | |||
| const auto device_id = ProfilingManager::Instance().GetDeviceID(); | |||
| int32_t device_num = static_cast<int32_t>(device_id.size()); | |||
| if (device_num != 0) { | |||
| auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]); | |||
| if (device_id_ptr == nullptr) { | |||
| GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); | |||
| REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); | |||
| return; | |||
| } | |||
| for (int32_t i = 0; i < device_num; i++) { | |||
| device_id_ptr[i] = static_cast<uint32_t>(device_id[i]); | |||
| } | |||
| rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); | |||
| } | |||
| } | |||
| // stop profiling | |||
| int32_t cb_ret = MsprofFinalize(); | |||
| if (cb_ret != 0) { | |||
| GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); | |||
| return; | |||
| } | |||
| GELOGI("Stop Profiling success."); | |||
| } | |||
| void ProfilingInit::ShutDownProfiling() { | |||
| StopProfiling(); | |||
| ProfilingManager::Instance().PluginUnInit(); | |||
| ProfilingProperties::Instance().ClearProperties(); | |||
| } | |||
| uint64_t ProfilingInit::GetProfilingModule() { | |||
| uint64_t module = PROF_MODEL_EXECUTE_MASK | | |||
| PROF_RUNTIME_API_MASK | | |||
| PROF_RUNTIME_TRACE_MASK | | |||
| PROF_SCHEDULE_TIMELINE_MASK | | |||
| PROF_SCHEDULE_TRACE_MASK | | |||
| PROF_TASK_TIME_MASK | | |||
| PROF_SUBTASK_TIME_MASK | | |||
| PROF_AICPU_TRACE_MASK | | |||
| PROF_AICORE_METRICS_MASK | | |||
| PROF_AIVECTORCORE_METRICS_MASK | | |||
| PROF_MODEL_LOAD_MASK; | |||
| return module; | |||
| } | |||
| Status ProfilingInit::SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { | |||
| auto rt_ret = rtSetDeviceIdByGeModelIdx(model_id, device_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(ge::FAILED, "[Set][Device]Set Device id failed"); | |||
| return ge::FAILED; | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| Status ProfilingInit::UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { | |||
| auto rt_ret = rtUnsetDeviceIdByGeModelIdx(model_id, device_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(ge::FAILED, "[Set][Device]Set Device id failed"); | |||
| return ge::FAILED; | |||
| } | |||
| return ge::SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_COMMON_PROFILING_PROFILING_INIT_H_ | |||
| #define GE_COMMON_PROFILING_PROFILING_INIT_H_ | |||
| #include <vector> | |||
| #include <nlohmann/json.hpp> | |||
| #include <string> | |||
| #include "common/profiling/profiling_properties.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "framework/common/ge_types.h" | |||
| #include "toolchain/prof_callback.h" | |||
| using std::map; | |||
| using std::string; | |||
| using std::vector; | |||
| using Json = nlohmann::json; | |||
| namespace ge { | |||
| class ProfilingInit { | |||
| public: | |||
| static ProfilingInit &Instance(); | |||
| Status Init(const Options &options); | |||
| void StopProfiling(); | |||
| Status ProfRegisterCtrlCallback(); | |||
| void ShutDownProfiling(); | |||
| Status SetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); | |||
| Status UnsetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); | |||
| private: | |||
| ProfilingInit() = default; | |||
| ~ProfilingInit() = default; | |||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf, bool &is_execute_profiling); | |||
| Status ParseOptions(const std::string &options); | |||
| uint64_t GetProfilingModule(); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_COMMON_PROFILING_PROFILING_INIT_H_ | |||
| @@ -25,11 +25,14 @@ | |||
| #include "runtime/base.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "mmpa/mmpa_api.h" | |||
| #include "graph/load/graph_loader.h" | |||
| namespace { | |||
| const char *const kTrainingTrace = "training_trace"; | |||
| const char *const kFpPoint = "fp_point"; | |||
| const char *const kBpPoint = "bp_point"; | |||
| const uint64_t kProfModelExecuteMask = 0x0010; | |||
| const uint64_t kProfModelLoadMask = 0x8000000000000000; | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| const int32_t kMaxDeviceNum = 256; | |||
| @@ -65,14 +68,15 @@ const std::string kIdx = "idx"; | |||
| } // namespace | |||
| namespace ge { | |||
| ProfilingManager::ProfilingManager() | |||
| : is_load_profiling_(false), | |||
| is_execute_profiling_(false), | |||
| is_training_trace_(false), | |||
| subscribe_count_(0), | |||
| prof_cb_({nullptr, nullptr}), | |||
| index_id_(UINT64_MAX), | |||
| subscribe_info_({false, 0, 0}) { | |||
| subscribe_info_({false, 0, 0}), | |||
| reporter_callback_(nullptr) { | |||
| } | |||
| ProfilingManager::~ProfilingManager() {} | |||
| @@ -82,45 +86,6 @@ ProfilingManager &ProfilingManager::Instance() { | |||
| return profiling_manager; | |||
| } | |||
| ge::Status ProfilingManager::Init(const Options &options) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| vector<int32_t>().swap(device_id_); | |||
| subscribe_count_ = 0; | |||
| GELOGI("ProfilingManager::Init job_id:%s", options.job_id.c_str()); | |||
| struct MsprofGeOptions prof_conf = {{ 0 }}; | |||
| Status ret = InitFromOptions(options, prof_conf); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[Init][Profiling]Failed, error_code %u", ret); | |||
| REPORT_CALL_ERROR("E19999", "Init profiling failed, error_code %u", ret); | |||
| return ret; | |||
| } | |||
| if (is_execute_profiling_) { | |||
| if (prof_cb_.msprofCtrlCallback == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| int32_t cb_ret = prof_cb_.msprofCtrlCallback( | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
| static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions)); | |||
| if (cb_ret != 0) { | |||
| GELOGE(FAILED, "[Call][msprofCtrlCallback]Failed, type %u, return %d", | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), cb_ret); | |||
| REPORT_CALL_ERROR("E19999", "Call msprofCtrlCallback failed, type %u, return %d", | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
| cb_ret); | |||
| return FAILED; | |||
| } | |||
| GELOGI("Profiling init success"); | |||
| } else { | |||
| GELOGI("The profiling is off, skip the initialization"); | |||
| } | |||
| #endif | |||
| return SUCCESS; | |||
| } | |||
| ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOptions &prof_conf) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| // enable profiling by env | |||
| @@ -221,44 +186,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { | |||
| return ge::SUCCESS; | |||
| } | |||
| void ProfilingManager::StopProfiling() { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| uint64_t module = GetProfilingModule(); | |||
| // The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal | |||
| int32_t device_num = static_cast<int32_t>(device_id_.size()); | |||
| if (device_num != 0) { | |||
| auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]); | |||
| if (device_id_ptr == nullptr) { | |||
| GELOGE(FAILED, "[Stop][Profiling]Device id ptr is null."); | |||
| REPORT_INNER_ERROR("E19999", "Stop profiling, device id ptr is null"); | |||
| return; | |||
| } | |||
| for (int32_t i = 0; i < device_num; i++) { | |||
| device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]); | |||
| } | |||
| rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); | |||
| } | |||
| } | |||
| // stop profiling | |||
| if (prof_cb_.msprofCtrlCallback == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofCtrlCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofCtrlCallback callback is nullptr"); | |||
| return; | |||
| } | |||
| int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), | |||
| nullptr, 0); | |||
| if (cb_ret != 0) { | |||
| GELOGW("call msprofCtrlCallback failed, type:%u, return:%d", | |||
| static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), cb_ret); | |||
| return; | |||
| } | |||
| GELOGI("Stop Profiling success."); | |||
| #endif | |||
| } | |||
| void ProfilingManager::ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| for (size_t i = 0; i < task.input_format.size(); i++) { | |||
| @@ -440,21 +367,6 @@ void ProfilingManager::ReportProfilingData(uint32_t model_id, const std::vector< | |||
| #endif | |||
| } | |||
| uint64_t ProfilingManager::GetProfilingModule() { | |||
| uint64_t module = PROF_MODEL_EXECUTE_MASK | | |||
| PROF_RUNTIME_API_MASK | | |||
| PROF_RUNTIME_TRACE_MASK | | |||
| PROF_SCHEDULE_TIMELINE_MASK | | |||
| PROF_SCHEDULE_TRACE_MASK | | |||
| PROF_TASK_TIME_MASK | | |||
| PROF_SUBTASK_TIME_MASK | | |||
| PROF_AICPU_TRACE_MASK | | |||
| PROF_AICORE_METRICS_MASK | | |||
| PROF_AIVECTORCORE_METRICS_MASK | | |||
| PROF_MODEL_LOAD_MASK; | |||
| return module; | |||
| } | |||
| void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| if (prof_type == kProfModelSubscribe) { | |||
| @@ -485,8 +397,8 @@ void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uin | |||
| Status ProfilingManager::ProfModelSubscribe(uint64_t module, void *model) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| std::lock_guard<std::mutex> lock(mutex_); | |||
| uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||
| if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { | |||
| uint64_t model_load_mask = module & kProfModelLoadMask; | |||
| if ((subscribe_count_ == 0) && (model_load_mask == kProfModelLoadMask)) { | |||
| // register framework to profiling | |||
| // register Framework to profiling | |||
| int32_t cb_ret = PluginInit(); | |||
| @@ -566,9 +478,9 @@ Status ProfilingManager::ProfModelUnsubscribe(void *model) { | |||
| Status ProfilingManager::ProfInit(uint64_t module) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| std::lock_guard<std::mutex> lock(mutex_); | |||
| uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||
| uint64_t model_load_mask = module & kProfModelLoadMask; | |||
| if (model_load_mask == PROF_MODEL_LOAD_MASK) { | |||
| if (model_load_mask == kProfModelLoadMask) { | |||
| // register Framework to profiling | |||
| int32_t cb_ret = PluginInit(); | |||
| if (cb_ret != 0) { | |||
| @@ -611,7 +523,7 @@ Status ProfilingManager::ProfFinalize() { | |||
| CleanSubscribeInfo(); | |||
| int32_t dev_num = -1; | |||
| rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); | |||
| rtError_t rt_ret = rtProfilerStop(kProfModelLoadMask, dev_num, nullptr); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(FAILED, "[Stop][Profiler]Malloc buffer failed, ret 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Malloc buffer failed when stop profiling, ret 0x%X", rt_ret); | |||
| @@ -780,7 +692,7 @@ Status ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std: | |||
| "device num %d, ret 0x%X", module, device_num, rt_ret); | |||
| return FAILED; | |||
| } | |||
| if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { | |||
| if ((module & kProfModelExecuteMask) == kProfModelExecuteMask) { | |||
| for (int32_t i = 0; i < device_num; i++) { | |||
| if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { | |||
| device_id_.push_back(device_list[i]); | |||
| @@ -788,7 +700,7 @@ Status ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std: | |||
| } | |||
| GELOGI("Prof start: ge execute model start profiling."); | |||
| } | |||
| if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||
| if ((module & kProfModelLoadMask) == kProfModelLoadMask) { | |||
| GELOGW("Prof start: load model module is invalid."); | |||
| } | |||
| UpdateDeviceIdModuleMap(kProfStart, module, device_list); | |||
| @@ -829,8 +741,8 @@ Status ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std:: | |||
| "device num %d, ret 0x%X", module, device_num, rt_ret); | |||
| return FAILED; | |||
| } | |||
| uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; | |||
| if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { | |||
| uint64_t execute_model_mask = module & kProfModelExecuteMask; | |||
| if (execute_model_mask == kProfModelExecuteMask) { | |||
| for (int32_t i = 0; i < device_num; i++) { | |||
| auto iter = std::find(device_id_.begin(), device_id_.end(), device_list[i]); | |||
| if (iter != device_id_.end()) { | |||
| @@ -839,7 +751,7 @@ Status ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std:: | |||
| } | |||
| GELOGI("Prof stop: ge execute model stop profiling."); | |||
| } | |||
| if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||
| if ((module & kProfModelLoadMask) == kProfModelLoadMask) { | |||
| GELOGW("Prof stop: load model module is invalid."); | |||
| } | |||
| UpdateDeviceIdModuleMap(kProfStop, module, device_list); | |||
| @@ -896,13 +808,13 @@ bool ProfilingManager::ProfilingModelExecuteOn() const { | |||
| return execute_model_prof_on; | |||
| } | |||
| Status ProfilingManager::PluginInit() { | |||
| if (prof_cb_.msprofReporterCallback == nullptr) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() { | |||
| if (reporter_callback_ == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| int32_t cb_ret = prof_cb_.msprofReporterCallback( | |||
| int32_t cb_ret = reporter_callback_( | |||
| static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
| static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), | |||
| nullptr, 0); | |||
| @@ -912,7 +824,7 @@ Status ProfilingManager::PluginInit() { | |||
| return INTERNAL_ERROR; | |||
| } | |||
| cb_ret = prof_cb_.msprofReporterCallback( | |||
| cb_ret = reporter_callback_( | |||
| static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
| static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_DATA_MAX_LEN), | |||
| &reporter_max_len_, sizeof(uint32_t)); | |||
| @@ -927,12 +839,12 @@ Status ProfilingManager::PluginInit() { | |||
| void ProfilingManager::PluginUnInit() const { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| if (prof_cb_.msprofReporterCallback == nullptr) { | |||
| if (reporter_callback_ == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
| return; | |||
| } | |||
| int32_t cb_ret = prof_cb_.msprofReporterCallback( | |||
| int32_t cb_ret = reporter_callback_( | |||
| static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
| static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), | |||
| nullptr, 0); | |||
| @@ -942,13 +854,14 @@ void ProfilingManager::PluginUnInit() const { | |||
| #endif | |||
| } | |||
| Status ProfilingManager::CallMsprofReport(ReporterData &reporter_data) const { | |||
| if (prof_cb_.msprofReporterCallback == nullptr) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( | |||
| ReporterData &reporter_data) const { | |||
| if (reporter_callback_ == nullptr) { | |||
| GELOGE(ge::PARAM_INVALID, "[Check][Param]MsprofReporterCallback callback is nullptr"); | |||
| REPORT_INNER_ERROR("E19999", "MsprofReporterCallback callback is nullptr"); | |||
| return ge::PARAM_INVALID; | |||
| } | |||
| return prof_cb_.msprofReporterCallback( | |||
| return reporter_callback_( | |||
| static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
| static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), | |||
| static_cast<void *>(&reporter_data), sizeof(ReporterData)); | |||
| @@ -26,8 +26,8 @@ | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "framework/common/ge_types.h" | |||
| #include "external/register/register_types.h" | |||
| #include "toolchain/prof_callback.h" | |||
| #include "runtime/stream.h" | |||
| #include "toolchain/prof_callback.h" | |||
| using std::map; | |||
| using std::string; | |||
| @@ -52,7 +52,6 @@ namespace { | |||
| const uint64_t PROF_HCCL_TRACE_MASK = 0x1000; | |||
| const uint64_t PROF_DATA_PROCESS_MASK = 0x2000; | |||
| const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000; | |||
| } // namespace | |||
| namespace ge { | |||
| class OpDesc; | |||
| @@ -68,24 +67,17 @@ struct ProfSubscribeInfo { | |||
| uint32_t graph_id; | |||
| }; | |||
| struct MsprofCallback { | |||
| MsprofCtrlCallback msprofCtrlCallback; | |||
| MsprofReporterCallback msprofReporterCallback; | |||
| }; | |||
| class ProfilingManager { | |||
| public: | |||
| ProfilingManager(); | |||
| virtual ~ProfilingManager(); | |||
| static ProfilingManager &Instance(); | |||
| Status Init(const Options &options); | |||
| Status ProfInit(uint64_t module); | |||
| Status ProfFinalize(); | |||
| Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
| Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
| Status ProfModelSubscribe(uint64_t module, void *model); | |||
| Status ProfModelUnsubscribe(void *model); | |||
| void StopProfiling(); | |||
| bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||
| // report model load profiling data flag, data contain task desc info, step info, model load fusion op info | |||
| bool ProfilingModelLoadOn() const { return is_load_profiling_; } | |||
| @@ -100,9 +92,8 @@ class ProfilingManager { | |||
| Status PluginInit(); | |||
| void PluginUnInit() const; | |||
| Status CallMsprofReport(ReporterData &reporter_data) const; | |||
| struct MsprofCallback &GetMsprofCallback() { return prof_cb_; } | |||
| void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | |||
| void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | |||
| const MsprofReporterCallback GetMsprofReporterCallback() const { return reporter_callback_; } | |||
| void SetMsprofReporterCallback(MsprofReporterCallback func) { reporter_callback_ = func; } | |||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
| void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||
| void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||
| @@ -116,6 +107,7 @@ class ProfilingManager { | |||
| void CleanSubscribeInfo(); | |||
| void SetGraphIdToModelMap(uint32_t graph_id, uint32_t model_id) { model_id_map_[graph_id] = model_id; } | |||
| Status GetModelIdFromGraph(uint32_t graph_id, uint32_t &model_id); | |||
| const vector<int32_t> &GetDeviceID() const { return device_id_; } | |||
| private: | |||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | |||
| @@ -139,7 +131,6 @@ class ProfilingManager { | |||
| uint32_t subscribe_count_; | |||
| std::mutex mutex_; | |||
| std::mutex mutex_report_; | |||
| MsprofCallback prof_cb_; | |||
| std::string fp_point_; | |||
| std::string bp_point_; | |||
| uint32_t reporter_max_len_ = 0; | |||
| @@ -147,6 +138,7 @@ class ProfilingManager { | |||
| std::map<uint32_t, uint32_t> device_id_map_; // key: graph_id, value: device_id | |||
| std::map<uint32_t, uint32_t> model_id_map_; // key: graph_id, value: model_id | |||
| ProfSubscribeInfo subscribe_info_; | |||
| MsprofReporterCallback reporter_callback_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | |||
| @@ -0,0 +1,124 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "profiling_properties.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "graph/ge_context.h" | |||
| namespace { | |||
| const uint64_t kMsProfOptionsMaxlen = 2048; | |||
| const char *const kFpPoint = "fp_point"; | |||
| const char *const kBpPoint = "bp_point"; | |||
| } // namespace ge | |||
| namespace ge{ | |||
| ProfilingProperties& ProfilingProperties::Instance() { | |||
| static ProfilingProperties profiling_properties; | |||
| return profiling_properties; | |||
| } | |||
| void ProfilingProperties::SetLoadProfiling(bool is_load_profiling) { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| is_load_profiling_ = is_load_profiling; | |||
| } | |||
| bool ProfilingProperties::IsLoadProfiling() { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| return is_load_profiling_; | |||
| } | |||
| void ProfilingProperties::SetExecuteProfiling(bool is_exec_profiling) { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| is_execute_profiling_ = is_exec_profiling; | |||
| } | |||
| bool ProfilingProperties::IsExecuteProfiling() { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| return is_execute_profiling_; | |||
| } | |||
| void ProfilingProperties::SetTrainingTrace(bool is_train_trace) { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| is_training_trace_ = is_train_trace; | |||
| } | |||
| void ProfilingProperties::GetFpBpPoint(std::string &fp_point, std::string &bp_point) { | |||
| // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| if (!fp_point_.empty() && !bp_point_.empty()) { | |||
| fp_point = fp_point_; | |||
| bp_point = bp_point_; | |||
| GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), | |||
| fp_point.c_str()); | |||
| return; | |||
| } | |||
| // ProfApi mode and training trace is set | |||
| // Parse options first | |||
| char env_profiling_options[kMsProfOptionsMaxlen] = {0x00}; | |||
| bool is_profiling_valid = false; | |||
| std::string profiling_options; | |||
| if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS && | |||
| !profiling_options.empty()) { | |||
| is_profiling_valid = true; | |||
| } else { | |||
| INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, kMsProfOptionsMaxlen); | |||
| if (ret != EN_OK) { | |||
| GELOGI("PROFILING_OPTIONS env is not exist."); | |||
| return; | |||
| } | |||
| GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); | |||
| profiling_options = env_profiling_options; | |||
| is_profiling_valid = true; | |||
| } | |||
| if (is_profiling_valid) { | |||
| try { | |||
| Json prof_options = Json::parse(profiling_options); | |||
| if (prof_options.contains(kFpPoint)) { | |||
| fp_point_ = prof_options[kFpPoint]; | |||
| } | |||
| if (prof_options.contains(kBpPoint)) { | |||
| bp_point_ = prof_options[kBpPoint]; | |||
| } | |||
| fp_point = fp_point_; | |||
| bp_point = bp_point_; | |||
| if (!fp_point_.empty() && !bp_point_.empty()) { | |||
| GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); | |||
| } | |||
| } catch (...) { | |||
| GELOGW("Json prof options is invalid."); | |||
| return; | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| void ProfilingProperties::SetFpBpPoint(const std::string &fp_point, const std::string &bp_point) { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| fp_point_ = fp_point; | |||
| bp_point_ = bp_point; | |||
| } | |||
| void ProfilingProperties::ClearProperties() { | |||
| std::lock_guard<std::mutex>lock(mutex_); | |||
| is_load_profiling_ = false; | |||
| is_execute_profiling_ = false; | |||
| is_training_trace_ = false; | |||
| fp_point_.clear(); | |||
| bp_point_.clear(); | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_COMMON_PROFILING_PROPERTIES_H_ | |||
| #define GE_COMMON_PROFILING_PROPERTIES_H_ | |||
| #include <nlohmann/json.hpp> | |||
| #include <mutex> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "framework/common/ge_types.h" | |||
| using Json = nlohmann::json; | |||
| namespace ge { | |||
| class ProfilingProperties { | |||
| public: | |||
| static ProfilingProperties &Instance(); | |||
| void SetLoadProfiling(bool is_load_profiling); | |||
| bool IsLoadProfiling(); | |||
| void SetExecuteProfiling(bool is_execute_profiling); | |||
| bool IsExecuteProfiling(); | |||
| void SetTrainingTrace(bool is_train_trance); | |||
| bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||
| bool IsTrainTrace(); | |||
| void SetFpBpPoint(const std::string &fp_point, const std::string &bp_point); | |||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
| void ClearProperties(); | |||
| private: | |||
| ProfilingProperties() =default; | |||
| ~ProfilingProperties() = default; | |||
| std::mutex mutex_; | |||
| std::mutex point_mutex_; | |||
| bool is_load_profiling_ = false; | |||
| bool is_execute_profiling_ = false; | |||
| bool is_training_trace_ = false; | |||
| std::string fp_point_; | |||
| std::string bp_point_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_COMMON_PROFILING_PROPERTIES_H_ | |||
| @@ -1,6 +1,7 @@ | |||
| set(SRC_LIST | |||
| "ge_executor.cc" | |||
| "../common/profiling/profiling_manager.cc" | |||
| "../common/profiling/command_handle.cc" | |||
| "../common/dump/dump_op.cc" | |||
| "../common/dump/opdebug_register.cc" | |||
| "../common/dump/exception_dumper.cc" | |||
| @@ -33,6 +33,8 @@ | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| #include "graph/opsproto_manager.h" | |||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||
| #include "runtime/base.h" | |||
| #include "common/profiling/command_handle.h" | |||
| using std::string; | |||
| using std::vector; | |||
| @@ -250,7 +252,6 @@ Status GeExecutor::Initialize() { | |||
| GELOGW("Already initialized, no need to be initialized again."); | |||
| return ge::SUCCESS; | |||
| } | |||
| OpTilingManager::GetInstance().LoadSo(); | |||
| Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | |||
| @@ -277,7 +278,6 @@ Status GeExecutor::Initialize() { | |||
| profiling_options.device_id = 0; | |||
| // job id need to be set, the value is meaningless; | |||
| profiling_options.job_id = "1"; | |||
| ProfilingManager::Instance().Init(profiling_options); | |||
| isInit_ = true; | |||
| GELOGI("Init GeExecutor over."); | |||
| @@ -295,7 +295,6 @@ Status GeExecutor::Finalize() { | |||
| // Stop profiling | |||
| if (ProfilingManager::Instance().ProfilingOn()) { | |||
| ProfilingManager::Instance().StopProfiling(); | |||
| ProfilingManager::Instance().PluginUnInit(); | |||
| } | |||
| @@ -29,7 +29,6 @@ COMMON_LOCAL_SRC_FILES := \ | |||
| common/dump/dump_manager.cc \ | |||
| common/dump/dump_op.cc \ | |||
| common/dump/dump_server.cc \ | |||
| common/helper/model_cache_helper.cc \ | |||
| ge_local_engine/engine/host_cpu_engine.cc \ | |||
| @@ -24,7 +24,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
| common/fp16_t.cc \ | |||
| common/ge/plugin_manager.cc\ | |||
| common/ge/op_tiling_manager.cc\ | |||
| common/helper/model_cache_helper.cc \ | |||
| common/profiling/profiling_manager.cc \ | |||
| common/dump/dump_manager.cc \ | |||
| common/dump/dump_properties.cc \ | |||
| @@ -17,7 +17,7 @@ | |||
| #include "graph/build/task_generator.h" | |||
| #include <string> | |||
| #include <utility> | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/profiling/profiling_properties.h" | |||
| #include "framework/common/types.h" | |||
| #include "framework/common/util.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| @@ -945,7 +945,7 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint | |||
| vector<uint32_t> &all_reduce_nodes, std::string &fp_point_str, | |||
| std::string &bp_point_str) const { | |||
| ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str); | |||
| ProfilingProperties::Instance().GetFpBpPoint(fp_point_str, bp_point_str); | |||
| Status ret = SUCCESS; | |||
| if (fp_point_str.empty()) { | |||
| @@ -976,8 +976,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
| vector<uint32_t> &all_reduce_nodes) const { | |||
| GE_CHECK_NOTNULL(graph); | |||
| const char *profiling_mode = std::getenv(kProfilingMode); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || | |||
| ProfilingProperties::Instance().ProfilingTrainingTraceOn(); | |||
| if (!is_profiling) { | |||
| GELOGD("Profiling is not open."); | |||
| return SUCCESS; | |||
| @@ -1071,8 +1071,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||
| vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
| vector<domi::TaskDef> &task_def_list) { | |||
| const char *profiling_mode = std::getenv(kProfilingMode); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || | |||
| ProfilingProperties::Instance().ProfilingTrainingTraceOn(); | |||
| bool is_insert_fp_profiling_task = false; | |||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); | |||
| bool is_insert_bp_profiling_task = false; | |||
| @@ -1167,8 +1167,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||
| vector<domi::TaskDef> &task_def_list) { | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| const char *profiling_mode = std::getenv(kProfilingMode); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingProperties::Instance().ProfilingOn() || | |||
| ProfilingProperties::Instance().ProfilingTrainingTraceOn(); | |||
| bool is_insert_bp_profiling_task = false; | |||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | |||
| bool is_insert_end_profiling_task = false; | |||
| @@ -27,6 +27,7 @@ | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "common/model/ge_root_model.h" | |||
| #include "common/formats/utils/formats_trans_utils.h" | |||
| #include "framework/omg/omg_inner_types.h" | |||
| namespace ge { | |||
| thread_local uint32_t device_count = 0; | |||
| @@ -330,6 +331,17 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| GenModelId(&model_id); | |||
| GELOGD("Generate new model_id:%u", model_id); | |||
| } | |||
| if (!domi::GetContext().train_flag) { | |||
| int32_t tmp_device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&tmp_device_id); | |||
| if (rt_ret != RT_ERROR_NONE || tmp_device_id < 0) { | |||
| GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret); | |||
| return ge::FAILED; | |||
| } | |||
| uint32_t device_id = static_cast<uint32_t>(tmp_device_id); | |||
| rtSetDeviceIdByGeModelIdx(model_id, device_id); | |||
| } | |||
| auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| string om_name; | |||
| if (IsNeedHybridLoad(*ge_root_model)) { | |||
| @@ -448,6 +460,10 @@ Status ModelManager::Unload(uint32_t model_id) { | |||
| } else { | |||
| GELOGI("Unload model %u success.no need reset device,device_count: %u", model_id, device_count); | |||
| } | |||
| uint32_t device_id = 0; | |||
| if (!domi::GetContext().train_flag) { | |||
| rtUnsetDeviceIdByGeModelIdx(model_id, device_id); | |||
| } | |||
| std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | |||
| exception_infos_.clear(); | |||
| return SUCCESS; | |||
| @@ -1146,7 +1162,17 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
| GELOGE(ret, "[Load][RootModel] failed, ret:%d, model_id:%u.", ret, model_id); | |||
| return ret; | |||
| } | |||
| if (!domi::GetContext().train_flag) { | |||
| int32_t tmp_device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&tmp_device_id); | |||
| if (rt_ret != RT_ERROR_NONE || tmp_device_id < 0) { | |||
| GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret); | |||
| REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret); | |||
| return ge::FAILED; | |||
| } | |||
| uint32_t device_id = static_cast<uint32_t>(tmp_device_id); | |||
| rtSetDeviceIdByGeModelIdx(model_id, device_id); | |||
| } | |||
| if (model_helper.GetModelType()) { | |||
| bool is_shape_unknown = false; | |||
| GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), | |||
| @@ -109,7 +109,6 @@ | |||
| #include "register/custom_pass_helper.h" | |||
| #include "external/graph/types.h" | |||
| #include "common/util/error_manager/error_manager.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| namespace { | |||
| const char *const kSummary = "Summary"; | |||
| @@ -462,9 +461,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| const std::map<std::string, std::string> &options, | |||
| const OmgContext &omg_context) { | |||
| IncreaseGraphCount(graph_id); | |||
| auto device_id = GetContext().DeviceId(); | |||
| GELOGD("Device id is %u", device_id); | |||
| ProfilingManager::Instance().SetGraphIdToDeviceMap(graph_id, device_id); | |||
| // validation for adding graphs of same graph_id in multi-thread secenario | |||
| // 1.previous thread owns same graph_id has finished the AddGraph procession | |||
| if (GetAddGraphCondition(graph_id) == kDoneAdded) { | |||
| @@ -43,6 +43,7 @@ | |||
| #include "runtime/kernel.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| #include "external/runtime/rt_error_codes.h" | |||
| #include "common/profiling/profiling_init.h" | |||
| using Json = nlohmann::json; | |||
| @@ -194,7 +195,6 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
| InitOptions(options); | |||
| // In train and infer, profiling is always needed. | |||
| InitProfiling(this->options_); | |||
| // 1.`is_train_mode_` means case: train | |||
| // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | |||
| // these two case with logical device id | |||
| @@ -206,16 +206,6 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
| return status; | |||
| } | |||
| void GELib::InitProfiling(Options &options) { | |||
| GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); | |||
| std::lock_guard<std::mutex> lock(status_mutex_); | |||
| GetContext().Init(); | |||
| // Profiling init | |||
| if (ProfilingManager::Instance().Init(options) != SUCCESS) { | |||
| GELOGW("Profiling init failed."); | |||
| } | |||
| } | |||
| void GELib::SetDefaultPrecisionMode(map<string, string> &new_options) { | |||
| auto iter = new_options.find(PRECISION_MODE); | |||
| if (iter != new_options.end()) { | |||
| @@ -463,9 +453,6 @@ Status GELib::Finalize() { | |||
| GELOGI("Analyzer finalization"); | |||
| Analyzer::GetInstance()->Finalize(); | |||
| // Shut down profiling | |||
| ShutDownProfiling(); | |||
| if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||
| GELOGI("System ShutDown."); | |||
| mid_state = SystemShutdownWithOptions(this->options_); | |||
| @@ -494,15 +481,6 @@ Status GELib::Finalize() { | |||
| return SUCCESS; | |||
| } | |||
| void GELib::ShutDownProfiling() { | |||
| std::lock_guard<std::mutex> lock(status_mutex_); | |||
| if (ProfilingManager::Instance().ProfilingOn()) { | |||
| ProfilingManager::Instance().StopProfiling(); | |||
| ProfilingManager::Instance().PluginUnInit(); | |||
| } | |||
| } | |||
| // Get Singleton Instance | |||
| std::shared_ptr<GELib> GELib::GetInstance() { return instancePtr_; } | |||
| @@ -65,7 +65,6 @@ class GE_FUNC_VISIBILITY GELib { | |||
| bool IsTrainMode() { return is_train_mode_; } | |||
| void InitProfiling(Options &options); | |||
| void ShutDownProfiling(); | |||
| Status InitSystemWithoutOptions(); | |||
| Status InitSystemWithOptions(Options &options); | |||
| @@ -36,6 +36,7 @@ | |||
| #include "runtime/mem.h" | |||
| #include "ir_build/option_utils.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/profiling/profiling_init.h" | |||
| namespace ge { | |||
| namespace { | |||
| @@ -288,6 +289,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu | |||
| GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); | |||
| if (mutex_.try_lock()) { | |||
| std::lock_guard<std::mutex> lock(mutex_, std::adopt_lock); | |||
| auto device_id = GetContext().DeviceId(); | |||
| GELOGD("device is is %u", device_id); | |||
| ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id); | |||
| if (!init_flag_) { | |||
| GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.", | |||
| session_id_, graph_id); | |||
| @@ -339,6 +343,9 @@ Status InnerSession::RunGraphWithStreamAsync(uint32_t graph_id, rtStream_t strea | |||
| "session id = %lu, graph id = %u, stream = %p.", session_id_, graph_id, stream); | |||
| return GE_SESS_INIT_FAILED; | |||
| } | |||
| auto device_id = GetContext().DeviceId(); | |||
| GELOGD("device id is %u", device_id); | |||
| ProfilingInit::Instance().SetDeviceIdByModelId(graph_id, device_id); | |||
| UpdateThreadContext(graph_id); | |||
| vector<GeTensor> ge_inputs; | |||
| for (auto &item : inputs) { | |||
| @@ -382,6 +389,9 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) { | |||
| session_id_, graph_id); | |||
| return GE_SESS_INIT_FAILED; | |||
| } | |||
| auto device_id = GetContext().DeviceId(); | |||
| GELOGD("remove device id %u", device_id); | |||
| ProfilingInit::Instance().UnsetDeviceIdByModelId(graph_id, device_id); | |||
| UpdateThreadContext(graph_id); | |||
| Status ret = graph_manager_.RemoveGraph(graph_id); | |||
| if (ret != SUCCESS) { | |||
| @@ -18,32 +18,8 @@ | |||
| #define INC_FRAMEWORK_COMMON_GE_PROFILING_H_ | |||
| #include "ge/ge_api_error_codes.h" | |||
| #include "toolchain/prof_callback.h" | |||
| #include "runtime/base.h" | |||
| const int MAX_DEV_NUM = 64; | |||
| enum ProfCommandHandleType { | |||
| kProfCommandhandleInit = 0, | |||
| kProfCommandhandleStart, | |||
| kProfCommandhandleStop, | |||
| kProfCommandhandleFinalize, | |||
| kProfCommandhandleModelSubscribe, | |||
| kProfCommandhandleModelUnsubscribe | |||
| }; | |||
| struct ProfCommandHandleData { | |||
| uint64_t profSwitch; | |||
| uint32_t devNums; // length of device id list | |||
| uint32_t devIdList[MAX_DEV_NUM]; | |||
| uint32_t modelId; | |||
| }; | |||
| GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); | |||
| GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); | |||
| GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func); | |||
| GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); | |||
| /// | |||
| /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading | |||
| /// @return Status result | |||
| @@ -52,4 +28,6 @@ GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id | |||
| GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id); | |||
| GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id); | |||
| #endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ | |||
| @@ -1,24 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ | |||
| #define INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ | |||
| #include "framework/common/profiling/ge_profiling.h" | |||
| GE_FUNC_VISIBILITY bool IsInitialize(); | |||
| #endif // INC_FRAMEWORK_COMMON_GE_RUNNER_PROFILING_H_ | |||
| @@ -40,3 +40,11 @@ rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback) { | |||
| rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback) { | |||
| return 0; | |||
| } | |||
| int32_t MsprofInit(uint32_t dataType, void *data, uint32_t dataLen) { | |||
| return 0; | |||
| } | |||
| int32_t MsprofFinalize() { | |||
| return 0; | |||
| } | |||
| @@ -552,6 +552,18 @@ rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtSetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtProfRegisterCtrlCallback(uint32_t logId, rtProfCtrlHandle callback) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -113,6 +113,9 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_manager.cc" | |||
| "${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_init.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_properties.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/command_handle.cc" | |||
| "${GE_CODE_DIR}/ge/common/profiling/ge_profiling.cc" | |||
| "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" | |||
| "${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" | |||
| @@ -717,6 +720,8 @@ set(SINGLE_OP_TEST_FILES | |||
| set(PROFILING_MNG_TEST_FILES | |||
| "profiling/ge_profiling_manager_unittest.cc" | |||
| "profiling/profiling_properties_unittest" | |||
| "profiling/profiling_init_unittest" | |||
| ) | |||
| set(HYBRID_TEST_FILES | |||
| @@ -896,7 +896,7 @@ TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) { | |||
| } | |||
| TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| ProfilingManager::Instance().reporter_callback_ = MsprofReport; | |||
| ProfileInfo profile; | |||
| profile.fusion_info.op_name = "relu"; | |||
| @@ -909,7 +909,7 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
| } | |||
| TEST_F(UtestDavinciModel, Sink_time_profile) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| ProfilingManager::Instance().reporter_callback_ = MsprofReport; | |||
| DavinciModel model(0, nullptr); | |||
| InputData current_data; | |||
| model.SinkTimeProfile(current_data); | |||
| @@ -1031,7 +1031,7 @@ TEST_F(UtestDavinciModel, NnExecute) { | |||
| input_data.blobs = output_data.blobs; | |||
| EXPECT_EQ(input_data.blobs.size(), 1); | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| ProfilingManager::Instance().reporter_callback_ = MsprofReport; | |||
| ProfilingManager::Instance().device_id_.emplace_back(0); | |||
| model.task_list_.resize(1); | |||
| EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); | |||
| @@ -26,6 +26,7 @@ | |||
| #define protected public | |||
| #define private public | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/profiling/command_handle.h" | |||
| #include "graph/ge_local_context.h" | |||
| #include "inc/framework/common/profiling/ge_profiling.h" | |||
| #include "graph/manager/graph_manager.h" | |||
| @@ -37,6 +38,17 @@ | |||
| using namespace ge; | |||
| using namespace std; | |||
| namespace { | |||
| enum ProfCommandHandleType { | |||
| kProfCommandhandleInit = 0, | |||
| kProfCommandhandleStart, | |||
| kProfCommandhandleStop, | |||
| kProfCommandhandleFinalize, | |||
| kProfCommandhandleModelSubscribe, | |||
| kProfCommandhandleModelUnsubscribe | |||
| }; | |||
| } | |||
| class UtestGeProfilinganager : public testing::Test { | |||
| protected: | |||
| void SetUp() override {} | |||
| @@ -97,11 +109,11 @@ TEST_F(UtestGeProfilinganager, ParseOptions) { | |||
| } | |||
| TEST_F(UtestGeProfilinganager, plungin_init_) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = ReporterCallback; | |||
| ProfilingManager::Instance().reporter_callback_ = ReporterCallback; | |||
| Status ret = ProfilingManager::Instance().PluginInit(); | |||
| EXPECT_EQ(ret, INTERNAL_ERROR); | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = nullptr; | |||
| ProfilingManager::Instance().reporter_callback_ = nullptr; | |||
| } | |||
| TEST_F(UtestGeProfilinganager, report_data_) { | |||
| @@ -169,31 +181,33 @@ TEST_F(UtestGeProfilinganager, get_device_from_graph) { | |||
| OmgContext context; | |||
| Status ret = graph_manager.AddGraph(graph_id, graph, options, context); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| ProfSetGraphIdToDeviceMap(graph_id, device_id); | |||
| ret = ProfGetDeviceFormGraphId(graph_id, device_id); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| TEST_F(UtestGeProfilinganager, handle_subscribe_info) { | |||
| ProfCommandHandleType prof_type = kProfCommandhandleModelSubscribe; | |||
| ProfCommandHandleData prof_data; | |||
| uint32_t prof_type = RT_PROF_CTRL_SWITCH; | |||
| rtProfCommandHandle prof_data; | |||
| prof_data.profSwitch = 0; | |||
| prof_data.modelId = 1; | |||
| prof_data.type = 0; | |||
| domi::GetContext().train_flag = true; | |||
| auto prof_ptr = std::make_shared<ProfCommandHandleData>(prof_data); | |||
| Status ret = ProfCommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
| auto prof_ptr = std::make_shared<rtProfCommandHandle>(prof_data); | |||
| Status ret = CommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| TEST_F(UtestGeProfilinganager, handle_unsubscribe_info) { | |||
| ProfCommandHandleType prof_type = kProfCommandhandleModelUnsubscribe; | |||
| ProfCommandHandleData prof_data; | |||
| uint32_t prof_type = kProfCommandhandleModelUnsubscribe; | |||
| rtProfCommandHandle prof_data; | |||
| prof_data.profSwitch = 0; | |||
| prof_data.modelId = 1; | |||
| domi::GetContext().train_flag = true; | |||
| auto &profiling_manager = ge::ProfilingManager::Instance(); | |||
| profiling_manager.SetSubscribeInfo(0, 1, true); | |||
| auto prof_ptr = std::make_shared<ProfCommandHandleData>(prof_data); | |||
| Status ret = ProfCommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
| auto prof_ptr = std::make_shared<rtProfCommandHandle>(prof_data); | |||
| Status ret = CommandHandle(prof_type, static_cast<void *>(prof_ptr.get()), sizeof(prof_data)); | |||
| profiling_manager.CleanSubscribeInfo(); | |||
| } | |||
| @@ -0,0 +1,76 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <bits/stdc++.h> | |||
| #include <dirent.h> | |||
| #include <gtest/gtest.h> | |||
| #include <fstream> | |||
| #include <map> | |||
| #include <string> | |||
| #define protected public | |||
| #define private public | |||
| #include "common/profiling/profiling_init.h" | |||
| #include "graph/ge_local_context.h" | |||
| #include "graph/manager/graph_manager.h" | |||
| #undef protected | |||
| #undef private | |||
| using namespace ge; | |||
| using namespace std; | |||
| class UtestGeProfilingInit : public testing::Test { | |||
| protected: | |||
| void SetUp() override {} | |||
| void TearDown() override {} | |||
| }; | |||
| TEST_F(UtestGeProfilingInit, test_init) { | |||
| setenv("PROFILING_MODE", "true", true); | |||
| Options options; | |||
| options.device_id = 0; | |||
| options.job_id = "0"; | |||
| options.profiling_mode = "1"; | |||
| options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"; | |||
| auto &profiling_init = ge::ProfilingInit::Instance(); | |||
| auto ret = profiling_init.Init(options); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| TEST_F(UtestGeProfilingInit, test_stop) { | |||
| auto &profiling_init = ge::ProfilingInit::Instance(); | |||
| profiling_init.StopProfiling(); | |||
| } | |||
| TEST_F(UtestGeProfilingInit, test_shut) { | |||
| auto &profiling_init = ge::ProfilingInit::Instance(); | |||
| profiling_init.ShutDownProfiling(); | |||
| } | |||
| TEST_F(UtestGeProfilingInit, test_set_deviceId) { | |||
| uint32_t model_id = 0; | |||
| uint32_t device_id = 0; | |||
| auto &profiling_init = ge::ProfilingInit::Instance(); | |||
| auto ret = profiling_init.SetDeviceIdByModelId(model_id, device_id); | |||
| } | |||
| TEST_F(UtestGeProfilingInit, test_unset_deviceId) { | |||
| uint32_t model_id = 0; | |||
| uint32_t device_id = 0; | |||
| auto &profiling_init = ge::ProfilingInit::Instance(); | |||
| auto ret = profiling_init.UnsetDeviceIdByModelId(model_id, device_id); | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <bits/stdc++.h> | |||
| #include <dirent.h> | |||
| #include <gtest/gtest.h> | |||
| #include <fstream> | |||
| #include <map> | |||
| #include <string> | |||
| #define protected public | |||
| #define private public | |||
| #include "common/profiling/profiling_properties.h" | |||
| #include "graph/ge_local_context.h" | |||
| #include "graph/manager/graph_manager.h" | |||
| #undef protected | |||
| #undef private | |||
| using namespace ge; | |||
| using namespace std; | |||
| class UtestGeProfilingProperties : public testing::Test { | |||
| protected: | |||
| void SetUp() override {} | |||
| void TearDown() override {} | |||
| }; | |||
| TEST_F(UtestGeProfilingProperties, test_execute_profiling) { | |||
| auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
| profiling_properties.SetExecuteProfiling(true); | |||
| auto is_execute = profiling_properties.IsExecuteProfiling(); | |||
| EXPECT_EQ(is_execute, true); | |||
| } | |||
| TEST_F(UtestGeProfilingProperties, test_training_trace) { | |||
| auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
| profiling_properties.SetTrainingTrace(true); | |||
| auto is_train_trance = profiling_properties.ProfilingTrainingTraceOn(); | |||
| EXPECT_EQ(is_train_trance, true); | |||
| } | |||
| TEST_F(UtestGeProfilingProperties, test_fpbp_point) { | |||
| auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
| std::string fp_point = "fp"; | |||
| std::string bp_point = "bp"; | |||
| profiling_properties.SetFpBpPoint(fp_point, bp_point); | |||
| profiling_properties.GetFpBpPoint(fp_point, bp_point); | |||
| EXPECT_EQ(fp_point, "fp"); | |||
| EXPECT_EQ(bp_point, "bp"); | |||
| } | |||
| TEST_F(UtestGeProfilingProperties, test_profiling_on) { | |||
| auto &profiling_properties = ge::ProfilingProperties::Instance(); | |||
| profiling_properties.SetExecuteProfiling(true); | |||
| profiling_properties.SetLoadProfiling(true); | |||
| auto profiling_on = profiling_properties.ProfilingOn(); | |||
| EXPECT_EQ(profiling_on, true); | |||
| } | |||
| @@ -33,6 +33,7 @@ extern "C" { | |||
| #endif | |||
| #endif | |||
| #define RT_PROF_MAX_DEV_NUM 64 | |||
| typedef int32_t rtError_t; | |||
| static const int32_t RT_ERROR_NONE = 0; // success | |||
| @@ -80,6 +81,13 @@ typedef enum tagRtLimitType { | |||
| RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | |||
| } rtLimitType_t; | |||
| typedef enum { | |||
| RT_PROF_CTRL_INVALID = 0, | |||
| RT_PROF_CTRL_SWITCH, | |||
| RT_PROF_CTRL_REPORTER, | |||
| RT_PROF_CTRL_BUTT, | |||
| } rtProfCtrlType_t; | |||
| typedef struct rtExceptionInfo { | |||
| uint32_t taskid; | |||
| uint32_t streamid; | |||
| @@ -88,6 +96,15 @@ typedef struct rtExceptionInfo { | |||
| uint32_t retcode; | |||
| } rtExceptionInfo; | |||
| typedef struct rtProfCommandHandle { | |||
| uint64_t profSwitch; | |||
| uint64_t profSwitchHi; | |||
| uint32_t devNums; | |||
| uint32_t devIdList[RT_PROF_MAX_DEV_NUM]; | |||
| uint32_t modelId; | |||
| uint32_t type; | |||
| } rtProfCommandHandle_t; | |||
| typedef void (*rtErrorCallback)(rtExceptionType); | |||
| typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | |||
| @@ -118,6 +135,8 @@ typedef void *rtLabel_t; | |||
| */ | |||
| typedef void *rtModel_t; | |||
| typedef rtError_t (*rtProfCtrlHandle)(uint32_t type, void *data, uint32_t len); | |||
| /** | |||
| * @ingroup profiling_base | |||
| * @brief runtime handle. | |||
| @@ -357,6 +376,14 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_ | |||
| */ | |||
| RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||
| RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t logId, rtProfCtrlHandle callback); | |||
| RTS_API rtError_t rtSetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); | |||
| RTS_API rtError_t rtUnsetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); | |||
| RTS_API rtError_t rtGetDeviceIdByGeModelIdx(uint32_t modelIdx, uint32_t &deviceId); | |||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
| } | |||
| #endif | |||
| @@ -114,15 +114,6 @@ enum MsprofCtrlCallbackType { | |||
| MSPROF_CTRL_PROF_SWITCH_OFF // for prof switch off | |||
| }; | |||
| #define MSPROF_MAX_DEV_NUM (64) | |||
| struct MsprofCommandHandle { | |||
| uint64_t profSwitch; | |||
| uint32_t devNums; // length of device id list | |||
| uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | |||
| uint32_t modelId; | |||
| }; | |||
| /** | |||
| * @name MsprofCtrlCallback | |||
| * @brief callback to start/stop profiling | |||