@@ -22,6 +22,7 @@ | |||
#include "graph/load/graph_loader.h" | |||
#include "init/gelib.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
namespace { | |||
const uint32_t kDeviceListIndex = 3; | |||
@@ -44,7 +45,7 @@ const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = { | |||
{kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; | |||
} // namespace | |||
bool TransProfConfigToParam(const MsprofCommandHandle &profCommand, vector<string> &prof_config_params) { | |||
bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) { | |||
prof_config_params.clear(); | |||
prof_config_params.emplace_back(kDeviceNums); | |||
prof_config_params.emplace_back(std::to_string(profCommand.devNums)); | |||
@@ -71,7 +72,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
return false; | |||
} | |||
if (device_nums == 0 || device_nums > MAX_DEV_NUM) { | |||
GELOGE(ge::PARAM_INVALID, "The device nums is invalid."); | |||
GELOGE(ge::PARAM_INVALID, "The device nums: %u is invalid.", device_nums); | |||
return false; | |||
} | |||
@@ -105,16 +106,26 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||
} | |||
ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { | |||
if (func == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "Msprof ctrl callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | |||
GELOGW("Msprof ctrl callback is exist, just ignore it."); | |||
} else { | |||
GELOGI("GE register Msprof ctrl callback."); | |||
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); | |||
} | |||
return ge::SUCCESS; | |||
} | |||
ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { | |||
if (func == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofSetDeviceCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
// Pass MsprofSetDeviceCallback to runtime | |||
GELOGI("GE pass setdevice callback to runtime."); | |||
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); | |||
if (rt_ret != ge::SUCCESS) { | |||
GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); | |||
@@ -124,54 +135,71 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { | |||
} | |||
ge::Status RegProfReporterCallback(MsprofReporterCallback func) { | |||
if (func == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | |||
GELOGW("Msprof reporter callback is exist, just ignore it."); | |||
} else { | |||
GELOGI("GE register Msprof reporter callback."); | |||
ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); | |||
// Pass MsprofReporterCallback to runtime | |||
ge::Status rt_ret = rtSetMsprofReporterCallback(func); | |||
if (rt_ret != ge::SUCCESS) { | |||
GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); | |||
return rt_ret; | |||
} | |||
// Pass MsprofReporterCallback to hccl in opskernel so initialize | |||
rt_ret = OpskernelBuilderManager::Instance().RegProfReporterCallback(func); | |||
if (rt_ret != ge::SUCCESS) { | |||
GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed."); | |||
return rt_ret; | |||
} | |||
} | |||
// Pass MsprofReporterCallback to runtime | |||
ge::Status rt_ret = rtSetMsprofReporterCallback(func); | |||
if (rt_ret != ge::SUCCESS) { | |||
GELOGE(rt_ret, "Pass MsprofReporterCallback to runtime failed!!"); | |||
return rt_ret; | |||
} | |||
// Pass MsprofReporterCallback to hccl in opskernel so initialize | |||
return ge::SUCCESS; | |||
} | |||
ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { | |||
GE_CHECK_NOTNULL(data); | |||
MsprofCommandHandle *prof_config_param = (MsprofCommandHandle *)data; | |||
if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { | |||
return ge::FAILED; | |||
} | |||
std::vector<string> prof_params; | |||
if (!TransProfConfigToParam(*prof_config_param, prof_params)) { | |||
GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); | |||
return ge::PARAM_INVALID; | |||
if (type != kProfCommandhandleFinalize) { | |||
GE_CHECK_NOTNULL(data); | |||
} | |||
MsprofCommandHandleData *prof_config_param = (MsprofCommandHandleData *)data; | |||
auto iter = kProfCommandTypeMap.find(type); | |||
if (iter == kProfCommandTypeMap.end()) { | |||
GELOGW("The prof comand type is invalid."); | |||
return ge::PARAM_INVALID; | |||
} | |||
std::vector<string> prof_params; | |||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { | |||
return ge::FAILED; | |||
} | |||
if (!TransProfConfigToParam(*prof_config_param, prof_params)) { | |||
GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); | |||
return ge::PARAM_INVALID; | |||
} | |||
} | |||
ge::GraphLoader graph_loader; | |||
ge::Command command; | |||
command.cmd_params.clear(); | |||
command.cmd_type = iter->second; | |||
command.cmd_params = prof_params; | |||
command.module_index = prof_config_param->profSwitch; | |||
GELOGI("GE commandhandle execute, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), | |||
prof_params[kDeviceListIndex].c_str(), command.module_index); | |||
if (type != kProfCommandhandleFinalize) { | |||
command.module_index = prof_config_param->profSwitch; | |||
} | |||
GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index); | |||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | |||
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | |||
} | |||
ge::Status ret = graph_loader.CommandHandle(command); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ret, "Handle profiling command failed"); | |||
return ge::FAILED; | |||
} | |||
GELOGI("Successfully execute profiling command 0x%llx.", command.module_index); | |||
GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); | |||
return ge::SUCCESS; | |||
} | |||
@@ -22,6 +22,7 @@ | |||
#include "graph/ge_context.h" | |||
#include "runtime/base.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
namespace { | |||
const char *const kTrainingTrace = "training_trace"; | |||
@@ -41,7 +42,10 @@ namespace ge { | |||
ProfilingManager::ProfilingManager() : is_load_profiling_(false), | |||
is_execute_profiling_(false), | |||
is_training_trace_(false), | |||
subscribe_count_(0) {} | |||
subscribe_count_(0) { | |||
prof_cb_.msprofCtrlCallback = nullptr; | |||
prof_cb_.msprofReporterCallback = nullptr; | |||
} | |||
ProfilingManager::~ProfilingManager() {} | |||
@@ -64,6 +68,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} | |||
if (is_execute_profiling_) { | |||
if (prof_cb_.msprofCtrlCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofCtrlCallback( | |||
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), | |||
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions)); | |||
@@ -116,7 +124,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt | |||
// Parse json str for bp fp | |||
Status ret = ParseOptions(prof_conf.options); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ge::PARAM_INVALID, "Parse taining trace param failed."); | |||
GELOGE(ge::PARAM_INVALID, "Parse training trace param failed."); | |||
return ge::PARAM_INVALID; | |||
} | |||
@@ -182,6 +190,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
} | |||
// stop profiling | |||
if (prof_cb_.msprofCtrlCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), | |||
nullptr, 0); | |||
if (cb_ret != 0) { | |||
@@ -210,7 +222,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
.append(std::to_string(stream_id)).append(" ") | |||
.append(std::to_string(model_id)).append("\n")); | |||
ReporterData reporter_data; | |||
ReporterData reporter_data{}; | |||
reporter_data.deviceId = device_id; | |||
reporter_data.data = (unsigned char *)data.c_str(); | |||
reporter_data.dataLen = data.size(); | |||
@@ -298,7 +310,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
ReporterData reporter_data; | |||
ReporterData reporter_data{}; | |||
int ret = -1; | |||
int32_t cb_ret = -1; | |||
size_t index = data.size() / kReportMaxLen; | |||
@@ -487,6 +499,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn | |||
GELOGE(FAILED, "Runtime profiler start failed."); | |||
return FAILED; | |||
} | |||
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(model_load_mask); | |||
if (hccl_ret != SUCCESS) { | |||
GELOGE(FAILED, "Hccl profiler start failed."); | |||
return FAILED; | |||
} | |||
is_load_profiling_ = true; | |||
GELOGI("Prof init: model load profiling on."); | |||
} | |||
@@ -517,6 +534,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi | |||
return FAILED; | |||
} | |||
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(PROF_MODEL_LOAD_MASK); | |||
if (hccl_ret != SUCCESS) { | |||
GELOGE(FAILED, "Hccl profiler stop failed."); | |||
return FAILED; | |||
} | |||
for (auto device_id_module : device_id_module_map_) { | |||
if (device_id_module.second != 0) { | |||
uint32_t device_id = static_cast<uint32_t>(device_id_module.first); | |||
@@ -640,6 +662,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
GELOGE(FAILED, "Runtime profiler config proc failed."); | |||
return FAILED; | |||
} | |||
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(module); | |||
if (hccl_ret != SUCCESS) { | |||
GELOGE(FAILED, "Hccl profiler start failed."); | |||
return FAILED; | |||
} | |||
if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { | |||
for (int32_t i = 0; i < device_num; i++) { | |||
if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { | |||
@@ -681,6 +709,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | |||
return FAILED; | |||
} | |||
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(module); | |||
if (hccl_ret != SUCCESS) { | |||
GELOGE(FAILED, "Hccl profiler stop failed."); | |||
return FAILED; | |||
} | |||
uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; | |||
if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { | |||
for (int32_t i = 0; i < device_num; i++) { | |||
@@ -749,6 +782,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const { | |||
if (prof_cb_.msprofReporterCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
return prof_cb_.msprofReporterCallback( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), | |||
@@ -757,6 +794,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Plugin | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
if (prof_cb_.msprofReporterCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
int32_t cb_ret = prof_cb_.msprofReporterCallback( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), | |||
@@ -769,6 +810,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( | |||
ReporterData &reporter_data) const { | |||
if (prof_cb_.msprofReporterCallback == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr."); | |||
return ge::PARAM_INVALID; | |||
} | |||
return prof_cb_.msprofReporterCallback( | |||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), | |||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), | |||
@@ -806,8 +851,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP | |||
} | |||
} catch (...) { | |||
GELOGE(FAILED, "Json prof options is invalid."); | |||
return ge::PARAM_INVALID; | |||
return; | |||
} | |||
return; | |||
} | |||
@@ -35,6 +35,23 @@ using Json = nlohmann::json; | |||
namespace { | |||
const std::string GE_PROFILING_MODULE = "Framework"; | |||
// DataTypeConfig MASK | |||
#define PROF_ACL_API_MASK 0x0001 | |||
#define PROF_TASK_TIME_MASK 0x0002 | |||
#define PROF_AICORE_METRICS_MASK 0x0004 | |||
#define PROF_AICPU_TRACE_MASK 0x0008 | |||
#define PROF_MODEL_EXECUTE_MASK 0x0010 | |||
#define PROF_RUNTIME_API_MASK 0x0020 | |||
#define PROF_RUNTIME_TRACE_MASK 0x0040 | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||
#define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||
#define PROF_SUBTASK_TIME_MASK 0x0400 | |||
#define PROF_TRAINING_TRACE_MASK 0x0800 | |||
#define PROF_HCCL_TRACE_MASK 0x1000 | |||
#define PROF_DATA_PROCESS_MASK 0x2000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
} // namespace | |||
namespace ge { | |||
struct DeviceSubsInfo { | |||