Merge pull request !66 from HW_KK/mastertags/v1.0.0
@@ -93,6 +93,7 @@ struct OutputData { | |||||
struct Command { | struct Command { | ||||
std::string cmd_type; // Command type | std::string cmd_type; // Command type | ||||
std::vector<std::string> cmd_params; // Command params | std::vector<std::string> cmd_params; // Command params | ||||
uint64_t module_index; // prof module | |||||
}; | }; | ||||
// The definition of I/O shape description | // The definition of I/O shape description | ||||
@@ -34,6 +34,11 @@ const char *const kName = "name"; | |||||
const char *const kTraceID = "traceId"; | const char *const kTraceID = "traceId"; | ||||
const char *const kProfDir = "resultPath"; | const char *const kProfDir = "resultPath"; | ||||
const size_t kReportMaxLen = 2048; | const size_t kReportMaxLen = 2048; | ||||
const int32_t kMaxDeviceNum = 256; | |||||
const std::string kConfigNumsdev = "devNums"; | |||||
const std::string kConfigDevIdList = "devIdList"; | |||||
const std::string kProfStart = "prof_start"; | |||||
const std::string kProfStop = "prof_stop"; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -64,7 +69,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
return ret; | return ret; | ||||
} | } | ||||
if (is_profiling_) { | |||||
if (is_load_profiling_) { | |||||
// register Framework to profiling | // register Framework to profiling | ||||
int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); | int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); | ||||
if (result != 0) { | if (result != 0) { | ||||
@@ -92,7 +97,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
const std::string &config) { | const std::string &config) { | ||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
try { | try { | ||||
is_profiling_ = false; | |||||
is_load_profiling_ = false; | |||||
is_execute_profiling_ = false; | |||||
profiling_opts_.clear(); | profiling_opts_.clear(); | ||||
op_trace_conf_.clear(); | op_trace_conf_.clear(); | ||||
Json start_prof_conf = Json::parse(config); | Json start_prof_conf = Json::parse(config); | ||||
@@ -114,7 +120,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
} | } | ||||
device_id_.push_back(std::stoi(device_id_str)); | device_id_.push_back(std::stoi(device_id_str)); | ||||
} | } | ||||
if (is_all == true) { | |||||
if (is_all) { | |||||
int32_t count = 0; | int32_t count = 0; | ||||
rtError_t rt_err = rtGetDeviceCount(&count); | rtError_t rt_err = rtGetDeviceCount(&count); | ||||
if (rt_err != RT_ERROR_NONE) { | if (rt_err != RT_ERROR_NONE) { | ||||
@@ -133,7 +139,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
GELOGE(FAILED, "Parse feature from acl cfg failed."); | GELOGE(FAILED, "Parse feature from acl cfg failed."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
is_profiling_ = true; | |||||
is_load_profiling_ = true; | |||||
is_execute_profiling_ = true; | |||||
} catch (...) { | } catch (...) { | ||||
GELOGE(FAILED, "Json conf is not invalid !"); | GELOGE(FAILED, "Json conf is not invalid !"); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
@@ -200,21 +207,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
const char *profiling_mode = std::getenv("PROFILING_MODE"); | const char *profiling_mode = std::getenv("PROFILING_MODE"); | ||||
const char *prof_options = std::getenv("PROFILING_OPTIONS"); | const char *prof_options = std::getenv("PROFILING_OPTIONS"); | ||||
if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { | if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { | ||||
is_profiling_ = false; | |||||
is_load_profiling_ = false; | |||||
is_execute_profiling_ = false; | |||||
} else { | } else { | ||||
std::string prof_options_str = std::string(prof_options); | std::string prof_options_str = std::string(prof_options); | ||||
profiling_opts_ = StringUtils::Split(prof_options_str, ':'); | profiling_opts_ = StringUtils::Split(prof_options_str, ':'); | ||||
is_profiling_ = true; | |||||
is_load_profiling_ = true; | |||||
is_execute_profiling_ = true; | |||||
GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); | GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); | ||||
} | } | ||||
if (!is_profiling_) { | |||||
if (!is_load_profiling_) { | |||||
const std::string enable_profiling = "1"; | const std::string enable_profiling = "1"; | ||||
if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { | if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { | ||||
is_profiling_ = false; | |||||
is_load_profiling_ = false; | |||||
is_execute_profiling_ = false; | |||||
return SUCCESS; | return SUCCESS; | ||||
} else { | } else { | ||||
profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); | profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); | ||||
is_profiling_ = true; | |||||
is_load_profiling_ = true; | |||||
is_execute_profiling_ = true; | |||||
GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); | GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); | ||||
} | } | ||||
} | } | ||||
@@ -310,7 +321,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
} | } | ||||
// runtime startup for profiling | // runtime startup for profiling | ||||
GE_CHK_RT_RET(rtProfilerStart()); | |||||
uint64_t module = GetProfilingModule(); | |||||
int32_t device_num = 1; | |||||
uint32_t device_id_rt = static_cast<uint32_t>(device_id); | |||||
GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt)); | |||||
// call profiling startup API | // call profiling startup API | ||||
ProfMgrCfg prof_cfg = {send_profiling_config_}; | ProfMgrCfg prof_cfg = {send_profiling_config_}; | ||||
@@ -333,11 +347,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||||
int ret = reporter->Flush(); | int ret = reporter->Flush(); | ||||
GELOGI("Report data end, ret is %d", ret); | GELOGI("Report data end, ret is %d", ret); | ||||
} | } | ||||
rtError_t rt_ret = rtProfilerStop(); | |||||
uint64_t module = GetProfilingModule(); | |||||
int32_t device_num = static_cast<int32_t>(device_id_.size()); | |||||
uint32_t *device_id_ptr = new (std::nothrow) uint32_t[device_num]; | |||||
if (device_id_ptr == nullptr) { | |||||
GELOGE(FAILED, "Stop profiling device id ptr is null."); | |||||
return; | |||||
} | |||||
for (int32_t i = 0; i < device_num; i++) { | |||||
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]); | |||||
} | |||||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGI("Call rtProfilerStop ret:%d", rt_ret); | |||||
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); | |||||
} | } | ||||
delete[] device_id_ptr; | |||||
device_id_ptr = nullptr; | |||||
for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { | for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { | ||||
int result = ProfMgrStop(prof_handle_vec_[i]); | int result = ProfMgrStop(prof_handle_vec_[i]); | ||||
@@ -526,13 +551,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
return; | return; | ||||
} | } | ||||
GELOGI("current phy_device_id:%d", phy_device_id); | GELOGI("current phy_device_id:%d", phy_device_id); | ||||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
if (ret == device_id_.end()) { | |||||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||||
return; | |||||
if (!is_acl_api_mode_) { | |||||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
if (ret == device_id_.end()) { | |||||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||||
return; | |||||
} | |||||
} | } | ||||
GELOGI("start ProfilingTaskDescInfo."); | GELOGI("start ProfilingTaskDescInfo."); | ||||
ProfilingTaskDescInfo(task_desc_info, phy_device_id); | ProfilingTaskDescInfo(task_desc_info, phy_device_id); | ||||
GELOGI("start ProfilingGraphDescInfo."); | GELOGI("start ProfilingGraphDescInfo."); | ||||
@@ -546,6 +571,305 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfi | |||||
recv_profiling_config_ = profiling_cfg; | recv_profiling_config_ = profiling_cfg; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { | |||||
uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | PROF_RUNTIME_TRACE_MASK | | |||||
PROF_SCHEDULE_TIMELINE_MASK | PROF_SCHEDULE_TRACE_MASK | PROF_TASK_TIME_MASK | | |||||
PROF_SUBTASK_TIME_MASK | PROF_AICPU_TRACE_MASK | PROF_AICORE_METRICS_MASK | | |||||
PROF_AIVECTORCORE_METRICS_MASK | PROF_MODEL_LOAD_MASK; | |||||
return module; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
std::lock_guard<std::mutex> lock(mutex_); | |||||
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||||
if (model_load_mask == PROF_MODEL_LOAD_MASK) { | |||||
// register Framework to profiling | |||||
int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); | |||||
if (result != SUCCESS) { | |||||
GELOGE(FAILED, "Register profiling engine failed."); | |||||
return FAILED; | |||||
} | |||||
int32_t device_num = -1; | |||||
rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "Runtime profiler start failed."); | |||||
return FAILED; | |||||
} | |||||
is_load_profiling_ = true; | |||||
GELOGI("Prof init: model load profiling on."); | |||||
} | |||||
uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK; | |||||
if (training_trace_mask == PROF_TRAINING_TRACE_MASK) { | |||||
is_training_trace_ = true; | |||||
} | |||||
is_acl_api_mode_ = true; | |||||
GELOGI("Prof init success."); | |||||
#endif | |||||
return SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFinalize() { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
std::lock_guard<std::mutex> lock(mutex_); | |||||
is_load_profiling_ = false; | |||||
is_training_trace_ = false; | |||||
is_acl_api_mode_ = false; | |||||
int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); | |||||
} | |||||
int32_t dev_num = -1; | |||||
rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "Runtime profiler stop failed."); | |||||
return FAILED; | |||||
} | |||||
for (auto device_id_module : device_id_module_map_) { | |||||
if (device_id_module.second != 0) { | |||||
uint32_t device_id = static_cast<uint32_t>(device_id_module.first); | |||||
GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); | |||||
rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "Runtime profiler stop failed."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
device_id_module_map_.clear(); | |||||
device_id_.clear(); | |||||
GELOGI("Prof finalize success."); | |||||
#endif | |||||
return SUCCESS; | |||||
} | |||||
Status ProfilingManager::ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | |||||
vector<int32_t> &device_list) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
auto iter = config_para.find(kConfigDevIdList); | |||||
if (iter != config_para.end()) { | |||||
std::string device_id_list = iter->second; | |||||
std::string temp; | |||||
vector<std::string> decvice_id; | |||||
for (uint32_t i = 0; i < device_id_list.size(); i++) { | |||||
if (isdigit(device_id_list[i])) { | |||||
temp.append(1, device_id_list[i]); | |||||
} else { | |||||
if (!temp.empty()) { | |||||
decvice_id.emplace_back(temp); | |||||
} | |||||
temp.clear(); | |||||
} | |||||
} | |||||
if (!temp.empty()) { | |||||
decvice_id.emplace_back(temp); | |||||
} | |||||
for (uint32_t i = 0; i < decvice_id.size(); i++) { | |||||
try { | |||||
int32_t dev_id = std::stoi(decvice_id[i]); | |||||
device_list.push_back(dev_id); | |||||
} catch (std::invalid_argument &) { | |||||
GELOGE(FAILED, "Device id: %s is invalid.", decvice_id[i].c_str()); | |||||
return FAILED; | |||||
} catch (std::out_of_range &) { | |||||
GELOGE(FAILED, "Device id: %s is out of range.", decvice_id[i].c_str()); | |||||
} catch (...) { | |||||
GELOGE(FAILED, "Device id: %s cannot change to int.", decvice_id[i].c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} else { | |||||
GELOGE(FAILED, "Config para not contain device id list."); | |||||
return FAILED; | |||||
} | |||||
#endif | |||||
return SUCCESS; | |||||
} | |||||
Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num, | |||||
vector<int32_t> &device_list) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
// device num | |||||
auto iter = config_para.find(kConfigNumsdev); | |||||
if (iter != config_para.end()) { | |||||
try { | |||||
device_num = std::stoi(iter->second); | |||||
} catch (std::invalid_argument &) { | |||||
GELOGE(FAILED, "Device nun: %s is invalid.", iter->second.c_str()); | |||||
return FAILED; | |||||
} catch (std::out_of_range &) { | |||||
GELOGE(FAILED, "Device num: %s is out of range.", iter->second.c_str()); | |||||
} catch (...) { | |||||
GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str()); | |||||
return FAILED; | |||||
} | |||||
} else { | |||||
GELOGE(FAILED, "Config para not contain device num."); | |||||
return FAILED; | |||||
} | |||||
// device id | |||||
if (ProfParseDeviceId(config_para, device_list) != SUCCESS) { | |||||
GELOGE(FAILED, "Parse config para device id failed."); | |||||
return FAILED; | |||||
} | |||||
if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) { | |||||
GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); | |||||
return FAILED; | |||||
} | |||||
#endif | |||||
return SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||||
ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
std::lock_guard<std::mutex> lock(mutex_); | |||||
int32_t device_num = 0; | |||||
vector<int32_t> device_list; | |||||
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { | |||||
GELOGE(FAILED, "Prof start parse param failed."); | |||||
return FAILED; | |||||
} | |||||
auto *device_id = new (std::nothrow) uint32_t[device_num]; | |||||
if (device_id == nullptr) { | |||||
GELOGE(FAILED, "Prof start parse param failed."); | |||||
return FAILED; | |||||
} | |||||
for (int32_t i = 0; i < device_num; i++) { | |||||
device_id[i] = static_cast<uint32_t>(device_list[i]); | |||||
} | |||||
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||||
rtError_t rt_ret = rtProfilerStart(module, device_num, device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
delete[] device_id; | |||||
GELOGE(FAILED, "Runtime profiler config proc failed."); | |||||
return FAILED; | |||||
} | |||||
delete[] device_id; | |||||
device_id = nullptr; | |||||
if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { | |||||
for (int32_t i = 0; i < device_num; i++) { | |||||
if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { | |||||
device_id_.push_back(device_list[i]); | |||||
} | |||||
} | |||||
GELOGI("Prof start: ge execute model start profiling."); | |||||
} | |||||
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||||
GELOGW("Prof start: load model module is invalid."); | |||||
} | |||||
UpdateDeviceIdModuleMap(kProfStart, module, device_list); | |||||
GELOGI("Prof start profiling success."); | |||||
#endif | |||||
return SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||||
ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
std::lock_guard<std::mutex> lock(mutex_); | |||||
int32_t device_num = 0; | |||||
vector<int32_t> device_list; | |||||
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { | |||||
GELOGE(FAILED, "Prof stop parse param failed."); | |||||
return FAILED; | |||||
} | |||||
auto *device_id = new (std::nothrow) uint32_t[device_num]; | |||||
if (device_id == nullptr) { | |||||
GELOGE(FAILED, "Prof stop parse param failed."); | |||||
return FAILED; | |||||
} | |||||
for (int32_t i = 0; i < device_num; i++) { | |||||
device_id[i] = static_cast<uint32_t>(device_list[i]); | |||||
} | |||||
GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
delete[] device_id; | |||||
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | |||||
return FAILED; | |||||
} | |||||
delete[] device_id; | |||||
device_id = nullptr; | |||||
uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; | |||||
if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { | |||||
for (int32_t i = 0; i < device_num; i++) { | |||||
auto iter = std::find(device_id_.begin(), device_id_.end(), device_list[i]); | |||||
if (iter != device_id_.end()) { | |||||
device_id_.erase(iter); | |||||
} | |||||
} | |||||
GELOGI("Prof stop: ge execute model stop profiling."); | |||||
} | |||||
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||||
GELOGW("Prof stop: load model module is invalid."); | |||||
} | |||||
UpdateDeviceIdModuleMap(kProfStop, module, device_list); | |||||
GELOGI("Prof stop profiling success."); | |||||
#endif | |||||
return SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap( | |||||
string prof_type, uint64_t module, const vector<int32_t> &device_list) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
if (prof_type == kProfStart) { | |||||
for (uint32_t i = 0; i < device_list.size(); i++) { | |||||
auto iter = device_id_module_map_.find(device_list[i]); | |||||
if (iter != device_id_module_map_.end()) { | |||||
uint64_t prof_on_module = device_id_module_map_[device_list[i]]; | |||||
// save all profiling on module of device | |||||
device_id_module_map_[device_list[i]] = prof_on_module | module; | |||||
} else { | |||||
device_id_module_map_[device_list[i]] = module; | |||||
} | |||||
} | |||||
} else if (prof_type == kProfStop) { | |||||
for (uint32_t i = 0; i < device_list.size(); i++) { | |||||
auto iter = device_id_module_map_.find(device_list[i]); | |||||
if (iter != device_id_module_map_.end()) { | |||||
uint64_t prof_on_module = device_id_module_map_[device_list[i]]; | |||||
uint64_t prof_off_module = prof_on_module & module; | |||||
uint64_t prof_on_left_module = prof_on_module & (~prof_off_module); | |||||
// stop profiling on module of device | |||||
device_id_module_map_[device_list[i]] = prof_on_left_module; | |||||
} | |||||
} | |||||
} else { | |||||
GELOGI("No need to update device_id module map."); | |||||
} | |||||
#endif | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::ProfilingModelExecuteOn() const { | |||||
int32_t logic_device_id = 0; | |||||
rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | |||||
} | |||||
GELOGI("Current logic_device_id:%d", logic_device_id); | |||||
uint32_t phy_device_id = 0; | |||||
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
} | |||||
GELOGI("Current phy_device_id:%d", phy_device_id); | |||||
bool execute_model_prof_on = false; | |||||
auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
if (iter != device_id_.end()) { | |||||
execute_model_prof_on = true; | |||||
} | |||||
GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); | |||||
return is_execute_profiling_ || execute_model_prof_on; | |||||
} | |||||
/** | /** | ||||
* @brief Profiling PluginImpl | * @brief Profiling PluginImpl | ||||
*/ | */ | ||||
@@ -18,6 +18,7 @@ | |||||
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | ||||
#include <nlohmann/json.hpp> | #include <nlohmann/json.hpp> | ||||
#include <mutex> | |||||
#include <map> | #include <map> | ||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
@@ -27,14 +28,17 @@ | |||||
#include "external/register/register_types.h" | #include "external/register/register_types.h" | ||||
#include "toolchain/prof_engine.h" | #include "toolchain/prof_engine.h" | ||||
#include "toolchain/prof_mgr_core.h" | #include "toolchain/prof_mgr_core.h" | ||||
#include "toolchain/prof_acl_api.h" | |||||
using std::map; | using std::map; | ||||
using std::string; | using std::string; | ||||
using std::vector; | using std::vector; | ||||
using Json = nlohmann::json; | using Json = nlohmann::json; | ||||
namespace ge { | |||||
namespace { | |||||
const std::string GE_PROFILING_MODULE = "Framework"; | const std::string GE_PROFILING_MODULE = "Framework"; | ||||
} // namespace | |||||
namespace ge { | |||||
// register Plugin | // register Plugin | ||||
class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { | class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { | ||||
public: | public: | ||||
@@ -69,10 +73,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
ge::Status InitFromOptions(const Options &options); | ge::Status InitFromOptions(const Options &options); | ||||
ge::Status InitFromAclCfg(const std::string &config); | ge::Status InitFromAclCfg(const std::string &config); | ||||
ge::Status StartProfiling(int32_t iter, int32_t device_id); | ge::Status StartProfiling(int32_t iter, int32_t device_id); | ||||
ge::Status ProfInit(uint64_t module); | |||||
ge::Status ProfFinalize(); | |||||
ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||||
ge::Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||||
void StopProfiling(); | void StopProfiling(); | ||||
bool ProfilingOpTraceOn() const { return is_op_trace_; } | bool ProfilingOpTraceOn() const { return is_op_trace_; } | ||||
bool ProfilingLoadFlag() const { return is_load_; } | bool ProfilingLoadFlag() const { return is_load_; } | ||||
bool ProfilingOn() const { return is_profiling_; } | |||||
bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||||
bool ProfilingModelLoadOn() const { return is_load_profiling_; } | |||||
bool ProfilingModelExecuteOn() const; | |||||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern | |||||
int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } | int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } | ||||
void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info, | void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info, | ||||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | ||||
@@ -87,9 +98,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
private: | private: | ||||
ge::Status ParseFeaturesFromAclCfg(const Json &feature); | ge::Status ParseFeaturesFromAclCfg(const Json &feature); | ||||
bool is_profiling_ = false; | |||||
ge::Status ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num, | |||||
vector<int32_t> &device_list); | |||||
ge::Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, vector<int32_t> &device_list); | |||||
uint64_t GetProfilingModule(); | |||||
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | |||||
bool is_load_profiling_ = false; | |||||
bool is_execute_profiling_ = false; | |||||
bool is_op_trace_ = false; | bool is_op_trace_ = false; | ||||
bool is_load_ = false; | bool is_load_ = false; | ||||
bool is_training_trace_ = false; | |||||
bool is_acl_api_mode_ = false; | |||||
int32_t op_trace_iter_num_ = 0; | int32_t op_trace_iter_num_ = 0; | ||||
string job_id_; | string job_id_; | ||||
string prof_dir_; | string prof_dir_; | ||||
@@ -102,6 +121,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
string system_trace_conf_; | string system_trace_conf_; | ||||
string task_trace_conf_; | string task_trace_conf_; | ||||
const ProfilingEngineImpl engine_; | const ProfilingEngineImpl engine_; | ||||
map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module | |||||
std::mutex mutex_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ |
@@ -806,7 +806,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
GELOGI("Start FindProfilingTaskIndex."); | GELOGI("Start FindProfilingTaskIndex."); | ||||
GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); | |||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||||
if (!is_profiling) { | if (!is_profiling) { | ||||
GELOGW("Profiling is not open."); | GELOGW("Profiling is not open."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -853,7 +854,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||||
vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
vector<domi::TaskDef> &task_def_list) { | vector<domi::TaskDef> &task_def_list) { | ||||
const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); | |||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||||
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | ||||
(profiling_point.end_index == 0)) { | (profiling_point.end_index == 0)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -909,7 +911,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
vector<domi::TaskDef> &task_def_list) { | vector<domi::TaskDef> &task_def_list) { | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); | |||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||||
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | ||||
(profiling_point.end_index == 0)) { | (profiling_point.end_index == 0)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -662,7 +662,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); | (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); | ||||
// collect profiling for ge | // collect profiling for ge | ||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||||
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | ||||
Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); | Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); | ||||
if (ret1 != SUCCESS) { | if (ret1 != SUCCESS) { | ||||
@@ -2384,14 +2384,15 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); | GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); | ||||
GELOGI("Copy input data, model id:%u", model_id); | GELOGI("Copy input data, model id:%u", model_id); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_START)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||||
model->SetProfileTime(MODEL_PRE_PROC_START)); | |||||
ret = model->CopyInputData(current_data, false); | ret = model->CopyInputData(current_data, false); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ||||
CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | ||||
continue, "Copy input data to model failed."); // [No need to check value] | continue, "Copy input data to model failed."); // [No need to check value] | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_START)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); | |||||
if (ProfilingManager::Instance().ProfilingOpTraceOn()) { | if (ProfilingManager::Instance().ProfilingOpTraceOn()) { | ||||
GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); | GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); | ||||
for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { | for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { | ||||
@@ -2444,10 +2445,11 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
GELOGI("rtStreamSynchronize end."); | GELOGI("rtStreamSynchronize end."); | ||||
GE_IF_BOOL_EXEC(model->is_first_execute_, | GE_IF_BOOL_EXEC(model->is_first_execute_, | ||||
GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); | GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); | |||||
} | } | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||||
model->SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
GE_TIMESTAMP_START(ReturnResult3); | GE_TIMESTAMP_START(ReturnResult3); | ||||
// copy output data from device to host | // copy output data from device to host | ||||
GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), | GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), | ||||
@@ -2456,8 +2458,9 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | ||||
GE_IF_BOOL_EXEC(model->is_first_execute_, | GE_IF_BOOL_EXEC(model->is_first_execute_, | ||||
GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); | GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)model->SinkTimeProfile(current_data)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||||
model->SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); | |||||
model->iterator_count_++; | model->iterator_count_++; | ||||
model->is_first_execute_ = false; | model->is_first_execute_ = false; | ||||
@@ -3402,32 +3405,32 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
zero_copy_batch_label_addrs_.clear(); | zero_copy_batch_label_addrs_.clear(); | ||||
} | } | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | |||||
Status ret = CopyModelData(input_data, output_data, is_dynamic_); | Status ret = CopyModelData(input_data, output_data, is_dynamic_); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | ||||
model_id_); | model_id_); | ||||
GELOGI("current_data.index=%u", input_data.index); | GELOGI("current_data.index=%u", input_data.index); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||||
if (!task_list_.empty()) { | if (!task_list_.empty()) { | ||||
GELOGD("rtModelExecute do"); | GELOGD("rtModelExecute do"); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_START)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); | |||||
rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | ||||
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | |||||
GELOGI("rtModelExecute end"); | GELOGI("rtModelExecute end"); | ||||
} | } | ||||
if (!is_async_mode_) { | if (!is_async_mode_) { | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
} | } | ||||
// report model time data | // report model time data | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)SinkTimeProfile(input_data)); | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); | |||||
GELOGI("Model run end, model id:%u", model_id_); | GELOGI("Model run end, model id:%u", model_id_); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -35,6 +35,14 @@ thread_local uint32_t device_count = 0; | |||||
namespace { | namespace { | ||||
const int kCmdParSize = 2; | const int kCmdParSize = 2; | ||||
const int kDumpCmdPairSize = 2; | const int kDumpCmdPairSize = 2; | ||||
const int kProfStartCmdParaSize = 2; | |||||
const std::string kCmdTypeProfile = "profile"; | |||||
const std::string kCmdTypeDump = "dump"; | |||||
const std::string kCmdTypeProfiling = "profiling"; | |||||
const std::string kCmdTypeProfInit = "prof_init"; | |||||
const std::string kCmdTypeProfFinalize = "prof_finalize"; | |||||
const std::string kCmdTypeProfStart = "prof_start"; | |||||
const std::string kCmdTypeProfStop = "prof_stop"; | |||||
} // namespace | } // namespace | ||||
DumpProperties ModelManager::dump_properties_; | DumpProperties ModelManager::dump_properties_; | ||||
@@ -303,7 +311,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
GELOGI("Parse model %u success.", model_id); | GELOGI("Parse model %u success.", model_id); | ||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | ||||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | ||||
davinci_model->SetProfileTime(MODEL_LOAD_END); | davinci_model->SetProfileTime(MODEL_LOAD_END); | ||||
@@ -531,7 +539,10 @@ Status ModelManager::Stop(uint32_t model_id) { | |||||
/// | /// | ||||
Status ModelManager::HandleCommand(const Command &command) { | Status ModelManager::HandleCommand(const Command &command) { | ||||
static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = { | static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = { | ||||
{"profile", HandleProfileCommand}, {"dump", HandleDumpCommand}, {"profiling", HandleAclProfilingCommand}}; | |||||
{kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, | |||||
{kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, | |||||
{kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, | |||||
{kCmdTypeProfStop, HandleProfStopCommand}}; | |||||
auto iter = cmds.find(command.cmd_type); | auto iter = cmds.find(command.cmd_type); | ||||
if (iter == cmds.end()) { | if (iter == cmds.end()) { | ||||
@@ -557,6 +568,71 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status ModelManager::HandleProfInitCommand(const Command &command) { | |||||
uint64_t module_index = command.module_index; | |||||
if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { | |||||
GELOGE(FAILED, "Handle prof init failed."); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status ModelManager::HandleProfFinalizeCommand(const Command &command) { | |||||
if (ProfilingManager::Instance().ProfFinalize() != SUCCESS) { | |||||
GELOGE(FAILED, "Handle prof finalize failed."); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/* | |||||
* cmd para when prof start | |||||
* "devNums:2" | |||||
* "devIdList:1,2" | |||||
* "profilingOption:PROF_OP_TRACE" | |||||
* "aicoreMetrics:AICORE_ARITHMATIC_THROUGHPUT" | |||||
*/ | |||||
Status ModelManager::HandleProfStartCommand(const Command &command) { | |||||
if (command.cmd_params.size() < kProfStartCmdParaSize) { | |||||
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2."); | |||||
return PARAM_INVALID; | |||||
} | |||||
std::map<std::string, std::string> cmd_params_map; | |||||
uint32_t step = 2; | |||||
for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { | |||||
if (i + 1 >= command.cmd_params.size()) { | |||||
continue; | |||||
} | |||||
cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; | |||||
} | |||||
uint64_t module_index = command.module_index; | |||||
if (ProfilingManager::Instance().ProfStartProfiling(module_index, cmd_params_map) != SUCCESS) { | |||||
GELOGE(FAILED, "Handle prof start failed."); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status ModelManager::HandleProfStopCommand(const Command &command) { | |||||
if (command.cmd_params.size() < kProfStartCmdParaSize) { | |||||
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2."); | |||||
return PARAM_INVALID; | |||||
} | |||||
std::map<std::string, std::string> cmd_params_map; | |||||
uint32_t step = 2; | |||||
for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { | |||||
if (i + 1 >= command.cmd_params.size()) { | |||||
continue; | |||||
} | |||||
cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; | |||||
} | |||||
uint64_t module_index = command.module_index; | |||||
if (ProfilingManager::Instance().ProfStopProfiling(module_index, cmd_params_map) != SUCCESS) { | |||||
GELOGE(FAILED, "Handle prof finalize failed."); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status ModelManager::HandleProfileCommand(const Command &command) { | Status ModelManager::HandleProfileCommand(const Command &command) { | ||||
if (command.cmd_params.size() < kCmdParSize) { | if (command.cmd_params.size() < kCmdParSize) { | ||||
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); | GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); | ||||
@@ -577,15 +653,6 @@ Status ModelManager::HandleProfileCommand(const Command &command) { | |||||
if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { | if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { | ||||
PropertiesManager::Instance().SetPropertyValue(map_key, value); | PropertiesManager::Instance().SetPropertyValue(map_key, value); | ||||
} | } | ||||
if ((map_key == PROFILE_STOP_KEY) && (value == PROFILE_STOP_VALUE)) { | |||||
rtError_t rt_ret = rtProfilerStop(); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(PARAM_INVALID, "Call rtProfilerStop ret:%d", rt_ret); | |||||
return PARAM_INVALID; | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -875,7 +942,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
GELOGI("Parse model %u success.", model_id); | GELOGI("Parse model %u success.", model_id); | ||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | ||||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | ||||
davinci_model->SetProfileTime(MODEL_LOAD_END); | davinci_model->SetProfileTime(MODEL_LOAD_END); | ||||
@@ -158,6 +158,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
static ge::Status HandleAclProfilingCommand(const Command &command); | static ge::Status HandleAclProfilingCommand(const Command &command); | ||||
static ge::Status HandleProfileCommand(const Command &command); | static ge::Status HandleProfileCommand(const Command &command); | ||||
static ge::Status HandleDumpCommand(const Command &command); | static ge::Status HandleDumpCommand(const Command &command); | ||||
static ge::Status HandleProfInitCommand(const Command &command); | |||||
static ge::Status HandleProfFinalizeCommand(const Command &command); | |||||
static ge::Status HandleProfStartCommand(const Command &command); | |||||
static ge::Status HandleProfStopCommand(const Command &command); | |||||
/// | /// | ||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
/// @brief get model memory usage | /// @brief get model memory usage | ||||
@@ -565,10 +565,12 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, | |||||
} | } | ||||
GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); | GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); | ||||
GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); | GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); | ||||
PassManager graph_pass; | |||||
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | |||||
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | |||||
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||||
if (unknown_shape_skip != nullptr) { | |||||
PassManager graph_pass; | |||||
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | |||||
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | |||||
} | |||||
GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); | GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); | ||||
GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); | GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); | ||||
@@ -1951,9 +1953,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
names_to_passes.emplace_back("MergePass", &merge_pass); | names_to_passes.emplace_back("MergePass", &merge_pass); | ||||
names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); | names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); | ||||
names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | ||||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||||
names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | ||||
names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); | names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); | ||||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||||
names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | ||||
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | ||||
names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); | names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); | ||||
@@ -43,13 +43,18 @@ | |||||
#define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) | #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) | ||||
#define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) | #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) | ||||
bool IsExperimental() { | |||||
const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr); | |||||
return kIsExperimental; | |||||
} | |||||
namespace ge { | namespace ge { | ||||
using Cluster = DynamicShapePartitioner::Cluster; | using Cluster = DynamicShapePartitioner::Cluster; | ||||
using ClusterPtr = std::shared_ptr<Cluster>; | using ClusterPtr = std::shared_ptr<Cluster>; | ||||
Status DynamicShapePartitioner::Partition() { | Status DynamicShapePartitioner::Partition() { | ||||
REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); | REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); | ||||
if (!GraphUtils::IsUnknownShapeGraph(root_graph_)) { | |||||
if (!IsExperimental()) { | |||||
GELOGD("Skip dynamic shape partition as not in experimental mode."); | GELOGD("Skip dynamic shape partition as not in experimental mode."); | ||||
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), | REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), | ||||
"Failed set dynamic shape partitioned flag on root graph."); | "Failed set dynamic shape partitioned flag on root graph."); | ||||
@@ -20,7 +20,6 @@ | |||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
namespace ge { | namespace ge { | ||||
/* Pass Explaination: | /* Pass Explaination: | ||||
@@ -43,12 +42,6 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { | |||||
GELOGD("CtrlEdgeTransferPass start running"); | GELOGD("CtrlEdgeTransferPass start running"); | ||||
GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
bool is_dynamic_shape = false; | |||||
(void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||||
if (!is_dynamic_shape) { | |||||
return SUCCESS; | |||||
} | |||||
for (ge::NodePtr &n : graph->GetDirectNode()) { | for (ge::NodePtr &n : graph->GetDirectNode()) { | ||||
auto op_desc = n->GetOpDesc(); | auto op_desc = n->GetOpDesc(); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -1,99 +1,99 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/** | |||||
* @file hccl_types.h | |||||
* @brief HCCL data type definition | |||||
* | |||||
*/ | |||||
#ifndef HCCL_TYPES_H_ | |||||
#define HCCL_TYPES_H_ | |||||
#include <stdint.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif // __cplusplus | |||||
/** | |||||
* @brief HCCL functions return value definition | |||||
*/ | |||||
typedef enum { | |||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
} HcclResult; | |||||
/** | |||||
* @brief handle to HCCL communicator | |||||
*/ | |||||
typedef void *HcclComm; | |||||
/** | |||||
* @brief HCCL Reduction opperation | |||||
*/ | |||||
typedef enum { | |||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
} HcclReduceOp; | |||||
/** | |||||
* @brief HCCL data type | |||||
*/ | |||||
typedef enum { | |||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
} HcclDataType; | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | |||||
* @brief HCCL root info | |||||
*/ | |||||
typedef struct HcclRootInfoDef { | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
} HcclRootInfo; | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/** | |||||
* @file hccl_types.h | |||||
* @brief HCCL data type definition | |||||
* | |||||
*/ | |||||
#ifndef HCCL_TYPES_H_ | |||||
#define HCCL_TYPES_H_ | |||||
#include <stdint.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif // __cplusplus | |||||
/** | |||||
* @brief HCCL functions return value definition | |||||
*/ | |||||
typedef enum { | |||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
} HcclResult; | |||||
/** | |||||
* @brief handle to HCCL communicator | |||||
*/ | |||||
typedef void *HcclComm; | |||||
/** | |||||
* @brief HCCL Reduction opperation | |||||
*/ | |||||
typedef enum { | |||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
} HcclReduceOp; | |||||
/** | |||||
* @brief HCCL data type | |||||
*/ | |||||
typedef enum { | |||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
} HcclDataType; | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | |||||
* @brief HCCL root info | |||||
*/ | |||||
typedef struct HcclRootInfoDef { | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
} HcclRootInfo; | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -426,13 +426,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type); | |||||
* @ingroup profiling_base | * @ingroup profiling_base | ||||
* @brief start rts profiler. | * @brief start rts profiler. | ||||
*/ | */ | ||||
RTS_API rtError_t rtProfilerStart(void); | |||||
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); | |||||
/** | /** | ||||
* @ingroup profiling_base | * @ingroup profiling_base | ||||
* @brief stop rts profiler. | * @brief stop rts profiler. | ||||
*/ | */ | ||||
RTS_API rtError_t rtProfilerStop(void); | |||||
RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); | |||||
/** | /** | ||||
* @ingroup profiling_base | * @ingroup profiling_base | ||||
@@ -0,0 +1,155 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef MSPROF_ENGINE_PROF_ACL_API_H_ | |||||
#define MSPROF_ENGINE_PROF_ACL_API_H_ | |||||
#define MSVP_MAX_DEV_NUM 64 | |||||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||||
// DataTypeConfig | |||||
#define PROF_ACL_API 0x0001 | |||||
#define PROF_TASK_TIME 0x0002 | |||||
#define PROF_AICORE_METRICS 0x0004 | |||||
#define PROF_AICPU_TRACE 0x0008 | |||||
#define PROF_MODEL_EXECUTE 0x0010 | |||||
#define PROF_RUNTIME_API 0x0020 | |||||
#define PROF_RUNTIME_TRACE 0x0040 | |||||
#define PROF_SCHEDULE_TIMELINE 0x0080 | |||||
#define PROF_SCHEDULE_TRACE 0x0100 | |||||
#define PROF_AIVECTORCORE_METRICS 0x0200 | |||||
#define PROF_SUBTASK_TIME 0x0400 | |||||
#define PROF_TRAINING_TRACE 0x0800 | |||||
#define PROF_HCCL_TRACE 0x1000 | |||||
#define PROF_DATA_PROCESS 0x2000 | |||||
#define PROF_TASK_TRACE 0x3842 | |||||
#define PROF_MODEL_LOAD 0x8000000000000000 | |||||
// DataTypeConfig MASK | |||||
#define PROF_ACL_API_MASK 0x0001 | |||||
#define PROF_TASK_TIME_MASK 0x0002 | |||||
#define PROF_AICORE_METRICS_MASK 0x0004 | |||||
#define PROF_AICPU_TRACE_MASK 0x0008 | |||||
#define PROF_MODEL_EXECUTE_MASK 0x0010 | |||||
#define PROF_RUNTIME_API_MASK 0x0020 | |||||
#define PROF_RUNTIME_TRACE_MASK 0x0040 | |||||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||||
#define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||||
#define PROF_SUBTASK_TIME_MASK 0x0400 | |||||
#define PROF_TRAINING_TRACE_MASK 0x0800 | |||||
#define PROF_HCCL_TRACE_MASK 0x1000 | |||||
#define PROF_DATA_PROCESS_MASK 0x2000 | |||||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||||
#include <cstdint> | |||||
#include <string> | |||||
/** | |||||
* @name ProrErrorCode | |||||
* @brief error code enum of prof_acl_apis | |||||
*/ | |||||
enum ProfErrorCode { | |||||
PROF_ERROR_NONE = 0, // ok | |||||
PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr | |||||
PROF_ERROR_REPEAT_INIT, // profiling has already been inited | |||||
PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string | |||||
PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable | |||||
PROF_ERROR_FAILURE, // failed to init or start profiling | |||||
PROF_ERROR_NOT_INITED, // profiling has not been inited | |||||
PROF_ERROR_DEVICE_INVALID, // device id invalid | |||||
PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics | |||||
PROF_ERROR_REPEAT_START, // profiilng has already been started | |||||
PROF_ERROR_NOT_STARTED, // profiling has not been started | |||||
}; | |||||
/** | |||||
* @brief transfer profiling config in acl.json to sample config | |||||
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} | |||||
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} | |||||
* @return ProfErrorCode | |||||
*/ | |||||
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); | |||||
/** | |||||
* @name ProfInit | |||||
* @brief init profiling | |||||
* @param profInitCfg [IN] config of init profiling of json format | |||||
* @return ProfErrorCode | |||||
*/ | |||||
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); | |||||
/** | |||||
* @name ProfAicoreMetrics | |||||
* @brief aicore metrics enum | |||||
*/ | |||||
enum ProfAicoreMetrics { | |||||
PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, | |||||
PROF_AICORE_PIPELINE = 1, | |||||
PROF_AICORE_SYNCHRONIZATION = 2, | |||||
PROF_AICORE_MEMORY = 3, | |||||
PROF_AICORE_INTERNAL_MEMORY = 4, | |||||
PROF_AICORE_STALL = 5, | |||||
PROF_AICORE_EVENT = 255 | |||||
}; | |||||
/** | |||||
* @name ProfConfig | |||||
* @brief struct of ProfStart | |||||
*/ | |||||
struct ProfConfig { | |||||
uint32_t devNums; // length of device id list | |||||
uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list | |||||
ProfAicoreMetrics aicoreMetrics; // aicore metric | |||||
uint64_t dataTypeConfig; // data type to start profiling | |||||
}; | |||||
/** | |||||
* @name ProfStartProfiling | |||||
* @brief start profiling | |||||
* @param profStartCfg [IN] config to start profiling | |||||
* @return ProfErrorCode | |||||
*/ | |||||
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); | |||||
/** | |||||
* @name ProfStopConfig | |||||
* @brief struct of ProfStop | |||||
*/ | |||||
struct ProfStopConfig { | |||||
uint64_t padding; | |||||
}; | |||||
/** | |||||
* @name ProfStopProfiling | |||||
* @brief stop profiling | |||||
* @param profStopCfg [IN] config to stop profiling | |||||
* @return ProfErrorCode | |||||
*/ | |||||
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); | |||||
/** | |||||
* @name ProfFinalize | |||||
* @brief finalize profiling task | |||||
* @return ProfErrorCode | |||||
*/ | |||||
MSVP_PROF_API int32_t ProfFinalize(); | |||||
#endif // MSPROF_ENGINE_PROF_ACL_API_H_ |