@@ -93,6 +93,7 @@ struct OutputData { | |||
struct Command { | |||
std::string cmd_type; // Command type | |||
std::vector<std::string> cmd_params; // Command params | |||
uint64_t module_index; // prof module | |||
}; | |||
// The definition of I/O shape description | |||
@@ -34,6 +34,11 @@ const char *const kName = "name"; | |||
const char *const kTraceID = "traceId"; | |||
const char *const kProfDir = "resultPath"; | |||
const size_t kReportMaxLen = 2048; | |||
const int32_t kMaxDeviceNum = 256; | |||
const std::string kConfigNumsdev = "devNums"; | |||
const std::string kConfigDevIdList = "devIdList"; | |||
const std::string kProfStart = "prof_start"; | |||
const std::string kProfStop = "prof_stop"; | |||
} // namespace | |||
namespace ge { | |||
@@ -64,7 +69,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
return ret; | |||
} | |||
if (is_profiling_) { | |||
if (is_load_profiling_) { | |||
// register Framework to profiling | |||
int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); | |||
if (result != 0) { | |||
@@ -92,7 +97,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
const std::string &config) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
try { | |||
is_profiling_ = false; | |||
is_load_profiling_ = false; | |||
is_execute_profiling_ = false; | |||
profiling_opts_.clear(); | |||
op_trace_conf_.clear(); | |||
Json start_prof_conf = Json::parse(config); | |||
@@ -114,7 +120,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} | |||
device_id_.push_back(std::stoi(device_id_str)); | |||
} | |||
if (is_all == true) { | |||
if (is_all) { | |||
int32_t count = 0; | |||
rtError_t rt_err = rtGetDeviceCount(&count); | |||
if (rt_err != RT_ERROR_NONE) { | |||
@@ -133,7 +139,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
GELOGE(FAILED, "Parse feature from acl cfg failed."); | |||
return FAILED; | |||
} | |||
is_profiling_ = true; | |||
is_load_profiling_ = true; | |||
is_execute_profiling_ = true; | |||
} catch (...) { | |||
GELOGE(FAILED, "Json conf is not invalid !"); | |||
return ge::PARAM_INVALID; | |||
@@ -200,21 +207,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
const char *profiling_mode = std::getenv("PROFILING_MODE"); | |||
const char *prof_options = std::getenv("PROFILING_OPTIONS"); | |||
if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { | |||
is_profiling_ = false; | |||
is_load_profiling_ = false; | |||
is_execute_profiling_ = false; | |||
} else { | |||
std::string prof_options_str = std::string(prof_options); | |||
profiling_opts_ = StringUtils::Split(prof_options_str, ':'); | |||
is_profiling_ = true; | |||
is_load_profiling_ = true; | |||
is_execute_profiling_ = true; | |||
GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); | |||
} | |||
if (!is_profiling_) { | |||
if (!is_load_profiling_) { | |||
const std::string enable_profiling = "1"; | |||
if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { | |||
is_profiling_ = false; | |||
is_load_profiling_ = false; | |||
is_execute_profiling_ = false; | |||
return SUCCESS; | |||
} else { | |||
profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); | |||
is_profiling_ = true; | |||
is_load_profiling_ = true; | |||
is_execute_profiling_ = true; | |||
GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); | |||
} | |||
} | |||
@@ -310,7 +321,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||
} | |||
// runtime startup for profiling | |||
GE_CHK_RT_RET(rtProfilerStart()); | |||
uint64_t module = GetProfilingModule(); | |||
int32_t device_num = 1; | |||
uint32_t device_id_rt = static_cast<uint32_t>(device_id); | |||
GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt)); | |||
// call profiling startup API | |||
ProfMgrCfg prof_cfg = {send_profiling_config_}; | |||
@@ -333,11 +347,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
int ret = reporter->Flush(); | |||
GELOGI("Report data end, ret is %d", ret); | |||
} | |||
rtError_t rt_ret = rtProfilerStop(); | |||
uint64_t module = GetProfilingModule(); | |||
int32_t device_num = static_cast<int32_t>(device_id_.size()); | |||
uint32_t *device_id_ptr = new (std::nothrow) uint32_t[device_num]; | |||
if (device_id_ptr == nullptr) { | |||
GELOGE(FAILED, "Stop profiling device id ptr is null."); | |||
return; | |||
} | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]); | |||
} | |||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGI("Call rtProfilerStop ret:%d", rt_ret); | |||
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); | |||
} | |||
delete[] device_id_ptr; | |||
device_id_ptr = nullptr; | |||
for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { | |||
int result = ProfMgrStop(prof_handle_vec_[i]); | |||
@@ -526,13 +551,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
return; | |||
} | |||
GELOGI("current phy_device_id:%d", phy_device_id); | |||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||
if (ret == device_id_.end()) { | |||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||
return; | |||
if (!is_acl_api_mode_) { | |||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||
if (ret == device_id_.end()) { | |||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||
return; | |||
} | |||
} | |||
GELOGI("start ProfilingTaskDescInfo."); | |||
ProfilingTaskDescInfo(task_desc_info, phy_device_id); | |||
GELOGI("start ProfilingGraphDescInfo."); | |||
@@ -546,6 +571,305 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfi | |||
recv_profiling_config_ = profiling_cfg; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { | |||
uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | PROF_RUNTIME_TRACE_MASK | | |||
PROF_SCHEDULE_TIMELINE_MASK | PROF_SCHEDULE_TRACE_MASK | PROF_TASK_TIME_MASK | | |||
PROF_SUBTASK_TIME_MASK | PROF_AICPU_TRACE_MASK | PROF_AICORE_METRICS_MASK | | |||
PROF_AIVECTORCORE_METRICS_MASK | PROF_MODEL_LOAD_MASK; | |||
return module; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||
if (model_load_mask == PROF_MODEL_LOAD_MASK) { | |||
// register Framework to profiling | |||
int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); | |||
if (result != SUCCESS) { | |||
GELOGE(FAILED, "Register profiling engine failed."); | |||
return FAILED; | |||
} | |||
int32_t device_num = -1; | |||
rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Runtime profiler start failed."); | |||
return FAILED; | |||
} | |||
is_load_profiling_ = true; | |||
GELOGI("Prof init: model load profiling on."); | |||
} | |||
uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK; | |||
if (training_trace_mask == PROF_TRAINING_TRACE_MASK) { | |||
is_training_trace_ = true; | |||
} | |||
is_acl_api_mode_ = true; | |||
GELOGI("Prof init success."); | |||
#endif | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFinalize() { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
is_load_profiling_ = false; | |||
is_training_trace_ = false; | |||
is_acl_api_mode_ = false; | |||
int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); | |||
} | |||
int32_t dev_num = -1; | |||
rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Runtime profiler stop failed."); | |||
return FAILED; | |||
} | |||
for (auto device_id_module : device_id_module_map_) { | |||
if (device_id_module.second != 0) { | |||
uint32_t device_id = static_cast<uint32_t>(device_id_module.first); | |||
GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); | |||
rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Runtime profiler stop failed."); | |||
return FAILED; | |||
} | |||
} | |||
} | |||
device_id_module_map_.clear(); | |||
device_id_.clear(); | |||
GELOGI("Prof finalize success."); | |||
#endif | |||
return SUCCESS; | |||
} | |||
Status ProfilingManager::ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | |||
vector<int32_t> &device_list) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
auto iter = config_para.find(kConfigDevIdList); | |||
if (iter != config_para.end()) { | |||
std::string device_id_list = iter->second; | |||
std::string temp; | |||
vector<std::string> decvice_id; | |||
for (uint32_t i = 0; i < device_id_list.size(); i++) { | |||
if (isdigit(device_id_list[i])) { | |||
temp.append(1, device_id_list[i]); | |||
} else { | |||
if (!temp.empty()) { | |||
decvice_id.emplace_back(temp); | |||
} | |||
temp.clear(); | |||
} | |||
} | |||
if (!temp.empty()) { | |||
decvice_id.emplace_back(temp); | |||
} | |||
for (uint32_t i = 0; i < decvice_id.size(); i++) { | |||
try { | |||
int32_t dev_id = std::stoi(decvice_id[i]); | |||
device_list.push_back(dev_id); | |||
} catch (std::invalid_argument &) { | |||
GELOGE(FAILED, "Device id: %s is invalid.", decvice_id[i].c_str()); | |||
return FAILED; | |||
} catch (std::out_of_range &) { | |||
GELOGE(FAILED, "Device id: %s is out of range.", decvice_id[i].c_str()); | |||
} catch (...) { | |||
GELOGE(FAILED, "Device id: %s cannot change to int.", decvice_id[i].c_str()); | |||
return FAILED; | |||
} | |||
} | |||
} else { | |||
GELOGE(FAILED, "Config para not contain device id list."); | |||
return FAILED; | |||
} | |||
#endif | |||
return SUCCESS; | |||
} | |||
Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num, | |||
vector<int32_t> &device_list) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
// device num | |||
auto iter = config_para.find(kConfigNumsdev); | |||
if (iter != config_para.end()) { | |||
try { | |||
device_num = std::stoi(iter->second); | |||
} catch (std::invalid_argument &) { | |||
GELOGE(FAILED, "Device nun: %s is invalid.", iter->second.c_str()); | |||
return FAILED; | |||
} catch (std::out_of_range &) { | |||
GELOGE(FAILED, "Device num: %s is out of range.", iter->second.c_str()); | |||
} catch (...) { | |||
GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str()); | |||
return FAILED; | |||
} | |||
} else { | |||
GELOGE(FAILED, "Config para not contain device num."); | |||
return FAILED; | |||
} | |||
// device id | |||
if (ProfParseDeviceId(config_para, device_list) != SUCCESS) { | |||
GELOGE(FAILED, "Parse config para device id failed."); | |||
return FAILED; | |||
} | |||
if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) { | |||
GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); | |||
return FAILED; | |||
} | |||
#endif | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||
ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
int32_t device_num = 0; | |||
vector<int32_t> device_list; | |||
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { | |||
GELOGE(FAILED, "Prof start parse param failed."); | |||
return FAILED; | |||
} | |||
auto *device_id = new (std::nothrow) uint32_t[device_num]; | |||
if (device_id == nullptr) { | |||
GELOGE(FAILED, "Prof start parse param failed."); | |||
return FAILED; | |||
} | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id[i] = static_cast<uint32_t>(device_list[i]); | |||
} | |||
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||
rtError_t rt_ret = rtProfilerStart(module, device_num, device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
delete[] device_id; | |||
GELOGE(FAILED, "Runtime profiler config proc failed."); | |||
return FAILED; | |||
} | |||
delete[] device_id; | |||
device_id = nullptr; | |||
if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { | |||
for (int32_t i = 0; i < device_num; i++) { | |||
if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { | |||
device_id_.push_back(device_list[i]); | |||
} | |||
} | |||
GELOGI("Prof start: ge execute model start profiling."); | |||
} | |||
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||
GELOGW("Prof start: load model module is invalid."); | |||
} | |||
UpdateDeviceIdModuleMap(kProfStart, module, device_list); | |||
GELOGI("Prof start profiling success."); | |||
#endif | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||
ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
int32_t device_num = 0; | |||
vector<int32_t> device_list; | |||
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { | |||
GELOGE(FAILED, "Prof stop parse param failed."); | |||
return FAILED; | |||
} | |||
auto *device_id = new (std::nothrow) uint32_t[device_num]; | |||
if (device_id == nullptr) { | |||
GELOGE(FAILED, "Prof stop parse param failed."); | |||
return FAILED; | |||
} | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id[i] = static_cast<uint32_t>(device_list[i]); | |||
} | |||
GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
delete[] device_id; | |||
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | |||
return FAILED; | |||
} | |||
delete[] device_id; | |||
device_id = nullptr; | |||
uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; | |||
if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { | |||
for (int32_t i = 0; i < device_num; i++) { | |||
auto iter = std::find(device_id_.begin(), device_id_.end(), device_list[i]); | |||
if (iter != device_id_.end()) { | |||
device_id_.erase(iter); | |||
} | |||
} | |||
GELOGI("Prof stop: ge execute model stop profiling."); | |||
} | |||
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { | |||
GELOGW("Prof stop: load model module is invalid."); | |||
} | |||
UpdateDeviceIdModuleMap(kProfStop, module, device_list); | |||
GELOGI("Prof stop profiling success."); | |||
#endif | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap( | |||
string prof_type, uint64_t module, const vector<int32_t> &device_list) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
if (prof_type == kProfStart) { | |||
for (uint32_t i = 0; i < device_list.size(); i++) { | |||
auto iter = device_id_module_map_.find(device_list[i]); | |||
if (iter != device_id_module_map_.end()) { | |||
uint64_t prof_on_module = device_id_module_map_[device_list[i]]; | |||
// save all profiling on module of device | |||
device_id_module_map_[device_list[i]] = prof_on_module | module; | |||
} else { | |||
device_id_module_map_[device_list[i]] = module; | |||
} | |||
} | |||
} else if (prof_type == kProfStop) { | |||
for (uint32_t i = 0; i < device_list.size(); i++) { | |||
auto iter = device_id_module_map_.find(device_list[i]); | |||
if (iter != device_id_module_map_.end()) { | |||
uint64_t prof_on_module = device_id_module_map_[device_list[i]]; | |||
uint64_t prof_off_module = prof_on_module & module; | |||
uint64_t prof_on_left_module = prof_on_module & (~prof_off_module); | |||
// stop profiling on module of device | |||
device_id_module_map_[device_list[i]] = prof_on_left_module; | |||
} | |||
} | |||
} else { | |||
GELOGI("No need to update device_id module map."); | |||
} | |||
#endif | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::ProfilingModelExecuteOn() const { | |||
int32_t logic_device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | |||
} | |||
GELOGI("Current logic_device_id:%d", logic_device_id); | |||
uint32_t phy_device_id = 0; | |||
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||
} | |||
GELOGI("Current phy_device_id:%d", phy_device_id); | |||
bool execute_model_prof_on = false; | |||
auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||
if (iter != device_id_.end()) { | |||
execute_model_prof_on = true; | |||
} | |||
GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); | |||
return is_execute_profiling_ || execute_model_prof_on; | |||
} | |||
/** | |||
* @brief Profiling PluginImpl | |||
*/ | |||
@@ -18,6 +18,7 @@ | |||
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | |||
#include <nlohmann/json.hpp> | |||
#include <mutex> | |||
#include <map> | |||
#include <string> | |||
#include <vector> | |||
@@ -27,14 +28,17 @@ | |||
#include "external/register/register_types.h" | |||
#include "toolchain/prof_engine.h" | |||
#include "toolchain/prof_mgr_core.h" | |||
#include "toolchain/prof_acl_api.h" | |||
using std::map; | |||
using std::string; | |||
using std::vector; | |||
using Json = nlohmann::json; | |||
namespace ge { | |||
namespace { | |||
const std::string GE_PROFILING_MODULE = "Framework"; | |||
} // namespace | |||
namespace ge { | |||
// register Plugin | |||
class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { | |||
public: | |||
@@ -69,10 +73,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
ge::Status InitFromOptions(const Options &options); | |||
ge::Status InitFromAclCfg(const std::string &config); | |||
ge::Status StartProfiling(int32_t iter, int32_t device_id); | |||
ge::Status ProfInit(uint64_t module); | |||
ge::Status ProfFinalize(); | |||
ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
ge::Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
void StopProfiling(); | |||
bool ProfilingOpTraceOn() const { return is_op_trace_; } | |||
bool ProfilingLoadFlag() const { return is_load_; } | |||
bool ProfilingOn() const { return is_profiling_; } | |||
bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | |||
bool ProfilingModelLoadOn() const { return is_load_profiling_; } | |||
bool ProfilingModelExecuteOn() const; | |||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern | |||
int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } | |||
void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info, | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
@@ -87,9 +98,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
private: | |||
ge::Status ParseFeaturesFromAclCfg(const Json &feature); | |||
bool is_profiling_ = false; | |||
ge::Status ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num, | |||
vector<int32_t> &device_list); | |||
ge::Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, vector<int32_t> &device_list); | |||
uint64_t GetProfilingModule(); | |||
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | |||
bool is_load_profiling_ = false; | |||
bool is_execute_profiling_ = false; | |||
bool is_op_trace_ = false; | |||
bool is_load_ = false; | |||
bool is_training_trace_ = false; | |||
bool is_acl_api_mode_ = false; | |||
int32_t op_trace_iter_num_ = 0; | |||
string job_id_; | |||
string prof_dir_; | |||
@@ -102,6 +121,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
string system_trace_conf_; | |||
string task_trace_conf_; | |||
const ProfilingEngineImpl engine_; | |||
map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module | |||
std::mutex mutex_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ |
@@ -806,7 +806,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
GELOGI("Start FindProfilingTaskIndex."); | |||
GE_CHECK_NOTNULL(graph); | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
if (!is_profiling) { | |||
GELOGW("Profiling is not open."); | |||
return SUCCESS; | |||
@@ -853,7 +854,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||
vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | |||
vector<domi::TaskDef> &task_def_list) { | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | |||
(profiling_point.end_index == 0)) { | |||
return SUCCESS; | |||
@@ -909,7 +911,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||
vector<domi::TaskDef> &task_def_list) { | |||
GE_CHECK_NOTNULL(op_desc); | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | |||
(profiling_point.end_index == 0)) { | |||
return SUCCESS; | |||
@@ -662,7 +662,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); | |||
// collect profiling for ge | |||
if (ProfilingManager::Instance().ProfilingOn()) { | |||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||
Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); | |||
if (ret1 != SUCCESS) { | |||
@@ -2384,14 +2384,15 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); | |||
GELOGI("Copy input data, model id:%u", model_id); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_START)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||
model->SetProfileTime(MODEL_PRE_PROC_START)); | |||
ret = model->CopyInputData(current_data, false); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | |||
CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||
continue, "Copy input data to model failed."); // [No need to check value] | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_START)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); | |||
if (ProfilingManager::Instance().ProfilingOpTraceOn()) { | |||
GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); | |||
for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { | |||
@@ -2444,10 +2445,11 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
GELOGI("rtStreamSynchronize end."); | |||
GE_IF_BOOL_EXEC(model->is_first_execute_, | |||
GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); | |||
} | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_START)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||
model->SetProfileTime(MODEL_AFTER_PROC_START)); | |||
GE_TIMESTAMP_START(ReturnResult3); | |||
// copy output data from device to host | |||
GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), | |||
@@ -2456,8 +2458,9 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | |||
GE_IF_BOOL_EXEC(model->is_first_execute_, | |||
GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)model->SinkTimeProfile(current_data)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||
model->SetProfileTime(MODEL_AFTER_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); | |||
model->iterator_count_++; | |||
model->is_first_execute_ = false; | |||
@@ -3402,32 +3405,32 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
zero_copy_batch_label_addrs_.clear(); | |||
} | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | |||
Status ret = CopyModelData(input_data, output_data, is_dynamic_); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | |||
model_id_); | |||
GELOGI("current_data.index=%u", input_data.index); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||
if (!task_list_.empty()) { | |||
GELOGD("rtModelExecute do"); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_START)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); | |||
rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | |||
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | |||
GELOGI("rtModelExecute end"); | |||
} | |||
if (!is_async_mode_) { | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | |||
} | |||
// report model time data | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)SinkTimeProfile(input_data)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); | |||
GELOGI("Model run end, model id:%u", model_id_); | |||
return SUCCESS; | |||
} | |||
@@ -35,6 +35,14 @@ thread_local uint32_t device_count = 0; | |||
namespace { | |||
const int kCmdParSize = 2; | |||
const int kDumpCmdPairSize = 2; | |||
const int kProfStartCmdParaSize = 2; | |||
const std::string kCmdTypeProfile = "profile"; | |||
const std::string kCmdTypeDump = "dump"; | |||
const std::string kCmdTypeProfiling = "profiling"; | |||
const std::string kCmdTypeProfInit = "prof_init"; | |||
const std::string kCmdTypeProfFinalize = "prof_finalize"; | |||
const std::string kCmdTypeProfStart = "prof_start"; | |||
const std::string kCmdTypeProfStop = "prof_stop"; | |||
} // namespace | |||
DumpProperties ModelManager::dump_properties_; | |||
@@ -303,7 +311,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
GELOGI("Parse model %u success.", model_id); | |||
if (ProfilingManager::Instance().ProfilingOn()) { | |||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
@@ -531,7 +539,10 @@ Status ModelManager::Stop(uint32_t model_id) { | |||
/// | |||
Status ModelManager::HandleCommand(const Command &command) { | |||
static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = { | |||
{"profile", HandleProfileCommand}, {"dump", HandleDumpCommand}, {"profiling", HandleAclProfilingCommand}}; | |||
{kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, | |||
{kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, | |||
{kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, | |||
{kCmdTypeProfStop, HandleProfStopCommand}}; | |||
auto iter = cmds.find(command.cmd_type); | |||
if (iter == cmds.end()) { | |||
@@ -557,6 +568,71 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfInitCommand(const Command &command) { | |||
uint64_t module_index = command.module_index; | |||
if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { | |||
GELOGE(FAILED, "Handle prof init failed."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfFinalizeCommand(const Command &command) { | |||
if (ProfilingManager::Instance().ProfFinalize() != SUCCESS) { | |||
GELOGE(FAILED, "Handle prof finalize failed."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
/* | |||
* cmd para when prof start | |||
* "devNums:2" | |||
* "devIdList:1,2" | |||
* "profilingOption:PROF_OP_TRACE" | |||
* "aicoreMetrics:AICORE_ARITHMATIC_THROUGHPUT" | |||
*/ | |||
Status ModelManager::HandleProfStartCommand(const Command &command) { | |||
if (command.cmd_params.size() < kProfStartCmdParaSize) { | |||
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2."); | |||
return PARAM_INVALID; | |||
} | |||
std::map<std::string, std::string> cmd_params_map; | |||
uint32_t step = 2; | |||
for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { | |||
if (i + 1 >= command.cmd_params.size()) { | |||
continue; | |||
} | |||
cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; | |||
} | |||
uint64_t module_index = command.module_index; | |||
if (ProfilingManager::Instance().ProfStartProfiling(module_index, cmd_params_map) != SUCCESS) { | |||
GELOGE(FAILED, "Handle prof start failed."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfStopCommand(const Command &command) { | |||
if (command.cmd_params.size() < kProfStartCmdParaSize) { | |||
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2."); | |||
return PARAM_INVALID; | |||
} | |||
std::map<std::string, std::string> cmd_params_map; | |||
uint32_t step = 2; | |||
for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { | |||
if (i + 1 >= command.cmd_params.size()) { | |||
continue; | |||
} | |||
cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; | |||
} | |||
uint64_t module_index = command.module_index; | |||
if (ProfilingManager::Instance().ProfStopProfiling(module_index, cmd_params_map) != SUCCESS) { | |||
GELOGE(FAILED, "Handle prof finalize failed."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfileCommand(const Command &command) { | |||
if (command.cmd_params.size() < kCmdParSize) { | |||
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); | |||
@@ -577,15 +653,6 @@ Status ModelManager::HandleProfileCommand(const Command &command) { | |||
if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { | |||
PropertiesManager::Instance().SetPropertyValue(map_key, value); | |||
} | |||
if ((map_key == PROFILE_STOP_KEY) && (value == PROFILE_STOP_VALUE)) { | |||
rtError_t rt_ret = rtProfilerStop(); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(PARAM_INVALID, "Call rtProfilerStop ret:%d", rt_ret); | |||
return PARAM_INVALID; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -875,7 +942,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
GELOGI("Parse model %u success.", model_id); | |||
if (ProfilingManager::Instance().ProfilingOn()) { | |||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
@@ -158,6 +158,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
static ge::Status HandleAclProfilingCommand(const Command &command); | |||
static ge::Status HandleProfileCommand(const Command &command); | |||
static ge::Status HandleDumpCommand(const Command &command); | |||
static ge::Status HandleProfInitCommand(const Command &command); | |||
static ge::Status HandleProfFinalizeCommand(const Command &command); | |||
static ge::Status HandleProfStartCommand(const Command &command); | |||
static ge::Status HandleProfStopCommand(const Command &command); | |||
/// | |||
/// @ingroup domi_ome | |||
/// @brief get model memory usage | |||
@@ -565,10 +565,12 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, | |||
} | |||
GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); | |||
PassManager graph_pass; | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | |||
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | |||
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||
if (unknown_shape_skip != nullptr) { | |||
PassManager graph_pass; | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | |||
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | |||
} | |||
GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); | |||
GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); | |||
@@ -1951,9 +1953,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
names_to_passes.emplace_back("MergePass", &merge_pass); | |||
names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); | |||
names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | |||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||
names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | |||
names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); | |||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||
names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | |||
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | |||
names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); | |||
@@ -43,13 +43,18 @@ | |||
#define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) | |||
#define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) | |||
bool IsExperimental() { | |||
const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr); | |||
return kIsExperimental; | |||
} | |||
namespace ge { | |||
using Cluster = DynamicShapePartitioner::Cluster; | |||
using ClusterPtr = std::shared_ptr<Cluster>; | |||
Status DynamicShapePartitioner::Partition() { | |||
REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); | |||
if (!GraphUtils::IsUnknownShapeGraph(root_graph_)) { | |||
if (!IsExperimental()) { | |||
GELOGD("Skip dynamic shape partition as not in experimental mode."); | |||
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), | |||
"Failed set dynamic shape partitioned flag on root graph."); | |||
@@ -20,7 +20,6 @@ | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "framework/common/util.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
namespace ge { | |||
/* Pass Explaination: | |||
@@ -43,12 +42,6 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { | |||
GELOGD("CtrlEdgeTransferPass start running"); | |||
GE_CHECK_NOTNULL(graph); | |||
bool is_dynamic_shape = false; | |||
(void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||
if (!is_dynamic_shape) { | |||
return SUCCESS; | |||
} | |||
for (ge::NodePtr &n : graph->GetDirectNode()) { | |||
auto op_desc = n->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
@@ -1,99 +1,99 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file hccl_types.h | |||
* @brief HCCL data type definition | |||
* | |||
*/ | |||
#ifndef HCCL_TYPES_H_ | |||
#define HCCL_TYPES_H_ | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
/** | |||
* @brief HCCL functions return value definition | |||
*/ | |||
typedef enum { | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
} HcclResult; | |||
/** | |||
* @brief handle to HCCL communicator | |||
*/ | |||
typedef void *HcclComm; | |||
/** | |||
* @brief HCCL Reduction opperation | |||
*/ | |||
typedef enum { | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
} HcclReduceOp; | |||
/** | |||
* @brief HCCL data type | |||
*/ | |||
typedef enum { | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
} HcclDataType; | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
/** | |||
* @brief HCCL root info | |||
*/ | |||
typedef struct HcclRootInfoDef { | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
} HcclRootInfo; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file hccl_types.h | |||
* @brief HCCL data type definition | |||
* | |||
*/ | |||
#ifndef HCCL_TYPES_H_ | |||
#define HCCL_TYPES_H_ | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
/** | |||
* @brief HCCL functions return value definition | |||
*/ | |||
typedef enum { | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
} HcclResult; | |||
/** | |||
* @brief handle to HCCL communicator | |||
*/ | |||
typedef void *HcclComm; | |||
/** | |||
* @brief HCCL Reduction opperation | |||
*/ | |||
typedef enum { | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
} HcclReduceOp; | |||
/** | |||
* @brief HCCL data type | |||
*/ | |||
typedef enum { | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
} HcclDataType; | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
/** | |||
* @brief HCCL root info | |||
*/ | |||
typedef struct HcclRootInfoDef { | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
} HcclRootInfo; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -426,13 +426,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type); | |||
* @ingroup profiling_base | |||
* @brief start rts profiler. | |||
*/ | |||
RTS_API rtError_t rtProfilerStart(void); | |||
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); | |||
/** | |||
* @ingroup profiling_base | |||
* @brief stop rts profiler. | |||
*/ | |||
RTS_API rtError_t rtProfilerStop(void); | |||
RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); | |||
/** | |||
* @ingroup profiling_base | |||
@@ -0,0 +1,155 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MSPROF_ENGINE_PROF_ACL_API_H_ | |||
#define MSPROF_ENGINE_PROF_ACL_API_H_ | |||
#define MSVP_MAX_DEV_NUM 64 | |||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||
// DataTypeConfig | |||
#define PROF_ACL_API 0x0001 | |||
#define PROF_TASK_TIME 0x0002 | |||
#define PROF_AICORE_METRICS 0x0004 | |||
#define PROF_AICPU_TRACE 0x0008 | |||
#define PROF_MODEL_EXECUTE 0x0010 | |||
#define PROF_RUNTIME_API 0x0020 | |||
#define PROF_RUNTIME_TRACE 0x0040 | |||
#define PROF_SCHEDULE_TIMELINE 0x0080 | |||
#define PROF_SCHEDULE_TRACE 0x0100 | |||
#define PROF_AIVECTORCORE_METRICS 0x0200 | |||
#define PROF_SUBTASK_TIME 0x0400 | |||
#define PROF_TRAINING_TRACE 0x0800 | |||
#define PROF_HCCL_TRACE 0x1000 | |||
#define PROF_DATA_PROCESS 0x2000 | |||
#define PROF_TASK_TRACE 0x3842 | |||
#define PROF_MODEL_LOAD 0x8000000000000000 | |||
// DataTypeConfig MASK | |||
#define PROF_ACL_API_MASK 0x0001 | |||
#define PROF_TASK_TIME_MASK 0x0002 | |||
#define PROF_AICORE_METRICS_MASK 0x0004 | |||
#define PROF_AICPU_TRACE_MASK 0x0008 | |||
#define PROF_MODEL_EXECUTE_MASK 0x0010 | |||
#define PROF_RUNTIME_API_MASK 0x0020 | |||
#define PROF_RUNTIME_TRACE_MASK 0x0040 | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||
#define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||
#define PROF_SUBTASK_TIME_MASK 0x0400 | |||
#define PROF_TRAINING_TRACE_MASK 0x0800 | |||
#define PROF_HCCL_TRACE_MASK 0x1000 | |||
#define PROF_DATA_PROCESS_MASK 0x2000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
#include <cstdint> | |||
#include <string> | |||
/** | |||
* @name ProrErrorCode | |||
* @brief error code enum of prof_acl_apis | |||
*/ | |||
enum ProfErrorCode { | |||
PROF_ERROR_NONE = 0, // ok | |||
PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr | |||
PROF_ERROR_REPEAT_INIT, // profiling has already been inited | |||
PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string | |||
PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable | |||
PROF_ERROR_FAILURE, // failed to init or start profiling | |||
PROF_ERROR_NOT_INITED, // profiling has not been inited | |||
PROF_ERROR_DEVICE_INVALID, // device id invalid | |||
PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics | |||
PROF_ERROR_REPEAT_START, // profiilng has already been started | |||
PROF_ERROR_NOT_STARTED, // profiling has not been started | |||
}; | |||
/** | |||
* @brief transfer profiling config in acl.json to sample config | |||
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} | |||
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); | |||
/** | |||
* @name ProfInit | |||
* @brief init profiling | |||
* @param profInitCfg [IN] config of init profiling of json format | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); | |||
/** | |||
* @name ProfAicoreMetrics | |||
* @brief aicore metrics enum | |||
*/ | |||
enum ProfAicoreMetrics { | |||
PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, | |||
PROF_AICORE_PIPELINE = 1, | |||
PROF_AICORE_SYNCHRONIZATION = 2, | |||
PROF_AICORE_MEMORY = 3, | |||
PROF_AICORE_INTERNAL_MEMORY = 4, | |||
PROF_AICORE_STALL = 5, | |||
PROF_AICORE_EVENT = 255 | |||
}; | |||
/** | |||
* @name ProfConfig | |||
* @brief struct of ProfStart | |||
*/ | |||
struct ProfConfig { | |||
uint32_t devNums; // length of device id list | |||
uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list | |||
ProfAicoreMetrics aicoreMetrics; // aicore metric | |||
uint64_t dataTypeConfig; // data type to start profiling | |||
}; | |||
/** | |||
* @name ProfStartProfiling | |||
* @brief start profiling | |||
* @param profStartCfg [IN] config to start profiling | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); | |||
/** | |||
* @name ProfStopConfig | |||
* @brief struct of ProfStop | |||
*/ | |||
struct ProfStopConfig { | |||
uint64_t padding; | |||
}; | |||
/** | |||
* @name ProfStopProfiling | |||
* @brief stop profiling | |||
* @param profStopCfg [IN] config to stop profiling | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); | |||
/** | |||
* @name ProfFinalize | |||
* @brief finalize profiling task | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfFinalize(); | |||
#endif // MSPROF_ENGINE_PROF_ACL_API_H_ |