From d9194b5c3ca965e084f17b332ed36bf43a492d62 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Thu, 17 Sep 2020 23:25:10 +0800 Subject: [PATCH] sync-from-trunk-to-blue-zone-0917 --- inc/framework/common/ge_types.h | 1 + src/ge/common/profiling/profiling_manager.cc | 362 +++++++++++++++++- src/ge/common/profiling/profiling_manager.h | 27 +- src/ge/graph/build/task_generator.cc | 9 +- .../load/new_model_manager/davinci_model.cc | 33 +- .../load/new_model_manager/model_manager.cc | 91 ++++- .../load/new_model_manager/model_manager.h | 4 + src/ge/graph/manager/graph_manager.cc | 12 +- .../partition/dynamic_shape_partition.cc | 7 +- .../graph/passes/ctrl_edge_transfer_pass.cc | 7 - third_party/fwkacllib/inc/hccl/hccl_types.h | 198 +++++----- .../fwkacllib/inc/register/host_cpu_context.h | 2 +- third_party/fwkacllib/inc/runtime/base.h | 4 +- .../fwkacllib/inc/toolchain/prof_acl_api.h | 155 ++++++++ 14 files changed, 745 insertions(+), 167 deletions(-) mode change 100755 => 100644 third_party/fwkacllib/inc/hccl/hccl_types.h create mode 100644 third_party/fwkacllib/inc/toolchain/prof_acl_api.h diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 36c1a0bf..9a4fd1f9 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -93,6 +93,7 @@ struct OutputData { struct Command { std::string cmd_type; // Command type std::vector cmd_params; // Command params + uint64_t module_index; // prof module }; // The definition of I/O shape description diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index 503d52a1..f147db21 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -34,6 +34,11 @@ const char *const kName = "name"; const char *const kTraceID = "traceId"; const char *const kProfDir = "resultPath"; const size_t kReportMaxLen = 2048; +const int32_t kMaxDeviceNum = 256; +const std::string kConfigNumsdev = "devNums"; +const std::string kConfigDevIdList = "devIdList"; +const std::string kProfStart = "prof_start"; +const std::string kProfStop = "prof_stop"; } // namespace namespace ge { @@ -64,7 +69,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In return ret; } - if (is_profiling_) { + if (is_load_profiling_) { // register Framework to profiling int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); if (result != 0) { @@ -92,7 +97,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In const std::string &config) { #ifdef DAVINCI_SUPPORT_PROFILING try { - is_profiling_ = false; + is_load_profiling_ = false; + is_execute_profiling_ = false; profiling_opts_.clear(); op_trace_conf_.clear(); Json start_prof_conf = Json::parse(config); @@ -114,7 +120,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } device_id_.push_back(std::stoi(device_id_str)); } - if (is_all == true) { + if (is_all) { int32_t count = 0; rtError_t rt_err = rtGetDeviceCount(&count); if (rt_err != RT_ERROR_NONE) { @@ -133,7 +139,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In GELOGE(FAILED, "Parse feature from acl cfg failed."); return FAILED; } - is_profiling_ = true; + is_load_profiling_ = true; + is_execute_profiling_ = true; } catch (...) { GELOGE(FAILED, "Json conf is not invalid !"); return ge::PARAM_INVALID; @@ -200,21 +207,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In const char *profiling_mode = std::getenv("PROFILING_MODE"); const char *prof_options = std::getenv("PROFILING_OPTIONS"); if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { - is_profiling_ = false; + is_load_profiling_ = false; + is_execute_profiling_ = false; } else { std::string prof_options_str = std::string(prof_options); profiling_opts_ = StringUtils::Split(prof_options_str, ':'); - is_profiling_ = true; + is_load_profiling_ = true; + is_execute_profiling_ = true; GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); } - if (!is_profiling_) { + if (!is_load_profiling_) { const std::string enable_profiling = "1"; if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { - is_profiling_ = false; + is_load_profiling_ = false; + is_execute_profiling_ = false; return SUCCESS; } else { profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); - is_profiling_ = true; + is_load_profiling_ = true; + is_execute_profiling_ = true; GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); } } @@ -310,7 +321,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St } // runtime startup for profiling - GE_CHK_RT_RET(rtProfilerStart()); + uint64_t module = GetProfilingModule(); + int32_t device_num = 1; + uint32_t device_id_rt = static_cast(device_id); + GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt)); // call profiling startup API ProfMgrCfg prof_cfg = {send_profiling_config_}; @@ -333,11 +347,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf int ret = reporter->Flush(); GELOGI("Report data end, ret is %d", ret); } - - rtError_t rt_ret = rtProfilerStop(); + uint64_t module = GetProfilingModule(); + int32_t device_num = static_cast(device_id_.size()); + uint32_t *device_id_ptr = new (std::nothrow) uint32_t[device_num]; + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "Stop profiling device id ptr is null."); + return; + } + for (int32_t i = 0; i < device_num; i++) { + device_id_ptr[i] = static_cast(device_id_[i]); + } + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr); if (rt_ret != RT_ERROR_NONE) { - GELOGI("Call rtProfilerStop ret:%d", rt_ret); + GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); } + delete[] device_id_ptr; + device_id_ptr = nullptr; for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { int result = ProfMgrStop(prof_handle_vec_[i]); @@ -526,13 +551,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGI("current phy_device_id:%d", phy_device_id); - - auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); - if (ret == device_id_.end()) { - GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); - return; + if (!is_acl_api_mode_) { + auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + if (ret == device_id_.end()) { + GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); + return; + } } - GELOGI("start ProfilingTaskDescInfo."); ProfilingTaskDescInfo(task_desc_info, phy_device_id); GELOGI("start ProfilingGraphDescInfo."); @@ -546,6 +571,305 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfi recv_profiling_config_ = profiling_cfg; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { + uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | PROF_RUNTIME_TRACE_MASK | + PROF_SCHEDULE_TIMELINE_MASK | PROF_SCHEDULE_TRACE_MASK | PROF_TASK_TIME_MASK | + PROF_SUBTASK_TIME_MASK | PROF_AICPU_TRACE_MASK | PROF_AICORE_METRICS_MASK | + PROF_AIVECTORCORE_METRICS_MASK | PROF_MODEL_LOAD_MASK; + return module; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; + + if (model_load_mask == PROF_MODEL_LOAD_MASK) { + // register Framework to profiling + int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); + if (result != SUCCESS) { + GELOGE(FAILED, "Register profiling engine failed."); + return FAILED; + } + int32_t device_num = -1; + rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler start failed."); + return FAILED; + } + is_load_profiling_ = true; + GELOGI("Prof init: model load profiling on."); + } + + uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK; + if (training_trace_mask == PROF_TRAINING_TRACE_MASK) { + is_training_trace_ = true; + } + is_acl_api_mode_ = true; + GELOGI("Prof init success."); +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFinalize() { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + is_load_profiling_ = false; + is_training_trace_ = false; + is_acl_api_mode_ = false; + + int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); + if (ret != SUCCESS) { + GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); + } + int32_t dev_num = -1; + rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler stop failed."); + return FAILED; + } + + for (auto device_id_module : device_id_module_map_) { + if (device_id_module.second != 0) { + uint32_t device_id = static_cast(device_id_module.first); + GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); + rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler stop failed."); + return FAILED; + } + } + } + device_id_module_map_.clear(); + device_id_.clear(); + GELOGI("Prof finalize success."); +#endif + return SUCCESS; +} + +Status ProfilingManager::ProfParseDeviceId(const std::map &config_para, + vector &device_list) { +#ifdef DAVINCI_SUPPORT_PROFILING + auto iter = config_para.find(kConfigDevIdList); + if (iter != config_para.end()) { + std::string device_id_list = iter->second; + std::string temp; + vector decvice_id; + for (uint32_t i = 0; i < device_id_list.size(); i++) { + if (isdigit(device_id_list[i])) { + temp.append(1, device_id_list[i]); + } else { + if (!temp.empty()) { + decvice_id.emplace_back(temp); + } + temp.clear(); + } + } + if (!temp.empty()) { + decvice_id.emplace_back(temp); + } + + for (uint32_t i = 0; i < decvice_id.size(); i++) { + try { + int32_t dev_id = std::stoi(decvice_id[i]); + device_list.push_back(dev_id); + } catch (std::invalid_argument &) { + GELOGE(FAILED, "Device id: %s is invalid.", decvice_id[i].c_str()); + return FAILED; + } catch (std::out_of_range &) { + GELOGE(FAILED, "Device id: %s is out of range.", decvice_id[i].c_str()); + } catch (...) { + GELOGE(FAILED, "Device id: %s cannot change to int.", decvice_id[i].c_str()); + return FAILED; + } + } + } else { + GELOGE(FAILED, "Config para not contain device id list."); + return FAILED; + } +#endif + return SUCCESS; +} + +Status ProfilingManager::ProfParseParam(const std::map &config_para, int32_t &device_num, + vector &device_list) { +#ifdef DAVINCI_SUPPORT_PROFILING + // device num + auto iter = config_para.find(kConfigNumsdev); + if (iter != config_para.end()) { + try { + device_num = std::stoi(iter->second); + } catch (std::invalid_argument &) { + GELOGE(FAILED, "Device nun: %s is invalid.", iter->second.c_str()); + return FAILED; + } catch (std::out_of_range &) { + GELOGE(FAILED, "Device num: %s is out of range.", iter->second.c_str()); + } catch (...) { + GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str()); + return FAILED; + } + } else { + GELOGE(FAILED, "Config para not contain device num."); + return FAILED; + } + // device id + if (ProfParseDeviceId(config_para, device_list) != SUCCESS) { + GELOGE(FAILED, "Parse config para device id failed."); + return FAILED; + } + + if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast(device_list.size())) { + GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); + return FAILED; + } +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +ProfilingManager::ProfStartProfiling(uint64_t module, const std::map &config_para) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + int32_t device_num = 0; + vector device_list; + if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { + GELOGE(FAILED, "Prof start parse param failed."); + return FAILED; + } + auto *device_id = new (std::nothrow) uint32_t[device_num]; + if (device_id == nullptr) { + GELOGE(FAILED, "Prof start parse param failed."); + return FAILED; + } + for (int32_t i = 0; i < device_num; i++) { + device_id[i] = static_cast(device_list[i]); + } + GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); + rtError_t rt_ret = rtProfilerStart(module, device_num, device_id); + if (rt_ret != RT_ERROR_NONE) { + delete[] device_id; + GELOGE(FAILED, "Runtime profiler config proc failed."); + return FAILED; + } + delete[] device_id; + device_id = nullptr; + if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { + for (int32_t i = 0; i < device_num; i++) { + if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { + device_id_.push_back(device_list[i]); + } + } + GELOGI("Prof start: ge execute model start profiling."); + } + if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { + GELOGW("Prof start: load model module is invalid."); + } + UpdateDeviceIdModuleMap(kProfStart, module, device_list); + GELOGI("Prof start profiling success."); +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +ProfilingManager::ProfStopProfiling(uint64_t module, const std::map &config_para) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + int32_t device_num = 0; + vector device_list; + if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) { + GELOGE(FAILED, "Prof stop parse param failed."); + return FAILED; + } + auto *device_id = new (std::nothrow) uint32_t[device_num]; + if (device_id == nullptr) { + GELOGE(FAILED, "Prof stop parse param failed."); + return FAILED; + } + for (int32_t i = 0; i < device_num; i++) { + device_id[i] = static_cast(device_list[i]); + } + GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id); + if (rt_ret != RT_ERROR_NONE) { + delete[] device_id; + GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); + return FAILED; + } + delete[] device_id; + device_id = nullptr; + uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; + if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { + for (int32_t i = 0; i < device_num; i++) { + auto iter = std::find(device_id_.begin(), device_id_.end(), device_list[i]); + if (iter != device_id_.end()) { + device_id_.erase(iter); + } + } + GELOGI("Prof stop: ge execute model stop profiling."); + } + if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) { + GELOGW("Prof stop: load model module is invalid."); + } + UpdateDeviceIdModuleMap(kProfStop, module, device_list); + GELOGI("Prof stop profiling success."); +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap( + string prof_type, uint64_t module, const vector &device_list) { +#ifdef DAVINCI_SUPPORT_PROFILING + if (prof_type == kProfStart) { + for (uint32_t i = 0; i < device_list.size(); i++) { + auto iter = device_id_module_map_.find(device_list[i]); + if (iter != device_id_module_map_.end()) { + uint64_t prof_on_module = device_id_module_map_[device_list[i]]; + // save all profiling on module of device + device_id_module_map_[device_list[i]] = prof_on_module | module; + } else { + device_id_module_map_[device_list[i]] = module; + } + } + } else if (prof_type == kProfStop) { + for (uint32_t i = 0; i < device_list.size(); i++) { + auto iter = device_id_module_map_.find(device_list[i]); + if (iter != device_id_module_map_.end()) { + uint64_t prof_on_module = device_id_module_map_[device_list[i]]; + uint64_t prof_off_module = prof_on_module & module; + uint64_t prof_on_left_module = prof_on_module & (~prof_off_module); + // stop profiling on module of device + device_id_module_map_[device_list[i]] = prof_on_left_module; + } + } + } else { + GELOGI("No need to update device_id module map."); + } +#endif +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::ProfilingModelExecuteOn() const { + int32_t logic_device_id = 0; + rtError_t rt_ret = rtGetDevice(&logic_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); + } + GELOGI("Current logic_device_id:%d", logic_device_id); + + uint32_t phy_device_id = 0; + rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); + } + GELOGI("Current phy_device_id:%d", phy_device_id); + bool execute_model_prof_on = false; + auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + if (iter != device_id_.end()) { + execute_model_prof_on = true; + } + GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); + return is_execute_profiling_ || execute_model_prof_on; +} + /** * @brief Profiling PluginImpl */ diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h index 26ee84ca..a030efd3 100644 --- a/src/ge/common/profiling/profiling_manager.h +++ b/src/ge/common/profiling/profiling_manager.h @@ -18,6 +18,7 @@ #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ #include +#include #include #include #include @@ -27,14 +28,17 @@ #include "external/register/register_types.h" #include "toolchain/prof_engine.h" #include "toolchain/prof_mgr_core.h" +#include "toolchain/prof_acl_api.h" using std::map; using std::string; using std::vector; using Json = nlohmann::json; -namespace ge { +namespace { const std::string GE_PROFILING_MODULE = "Framework"; +} // namespace +namespace ge { // register Plugin class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { public: @@ -69,10 +73,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); + ge::Status ProfInit(uint64_t module); + ge::Status ProfFinalize(); + ge::Status ProfStartProfiling(uint64_t module, const std::map &config_para); + ge::Status ProfStopProfiling(uint64_t module, const std::map &config_para); void StopProfiling(); bool ProfilingOpTraceOn() const { return is_op_trace_; } bool ProfilingLoadFlag() const { return is_load_; } - bool ProfilingOn() const { return is_profiling_; } + bool ProfilingTrainingTraceOn() const { return is_training_trace_; } + bool ProfilingModelLoadOn() const { return is_load_profiling_; } + bool ProfilingModelExecuteOn() const; + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } void ReportProfilingData(const std::vector &task_desc_info, const std::vector &compute_graph_desc_info); @@ -87,9 +98,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { private: ge::Status ParseFeaturesFromAclCfg(const Json &feature); - bool is_profiling_ = false; + ge::Status ProfParseParam(const std::map &config_para, int32_t &device_num, + vector &device_list); + ge::Status ProfParseDeviceId(const std::map &config_para, vector &device_list); + uint64_t GetProfilingModule(); + void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); + bool is_load_profiling_ = false; + bool is_execute_profiling_ = false; bool is_op_trace_ = false; bool is_load_ = false; + bool is_training_trace_ = false; + bool is_acl_api_mode_ = false; int32_t op_trace_iter_num_ = 0; string job_id_; string prof_dir_; @@ -102,6 +121,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string system_trace_conf_; string task_trace_conf_; const ProfilingEngineImpl engine_; + map device_id_module_map_; // key: device_id, value: profiling on module + std::mutex mutex_; }; } // namespace ge #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ diff --git a/src/ge/graph/build/task_generator.cc b/src/ge/graph/build/task_generator.cc index cf6b7a0d..8f8f28b3 100644 --- a/src/ge/graph/build/task_generator.cc +++ b/src/ge/graph/build/task_generator.cc @@ -806,7 +806,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi GELOGI("Start FindProfilingTaskIndex."); GE_CHECK_NOTNULL(graph); const char *profiling_mode = std::getenv(kProfilingMode); - bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); + bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || + ProfilingManager::Instance().ProfilingTrainingTraceOn(); if (!is_profiling) { GELOGW("Profiling is not open."); return SUCCESS; @@ -853,7 +854,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const vector &all_reduce_nodes, uint32_t node_index, vector &task_def_list) { const char *profiling_mode = std::getenv(kProfilingMode); - bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); + bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || + ProfilingManager::Instance().ProfilingTrainingTraceOn(); if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || (profiling_point.end_index == 0)) { return SUCCESS; @@ -909,7 +911,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P vector &task_def_list) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); - bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn(); + bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || + ProfilingManager::Instance().ProfilingTrainingTraceOn(); if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || (profiling_point.end_index == 0)) { return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 45cec2cf..7eddde8e 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -662,7 +662,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); // collect profiling for ge - if (ProfilingManager::Instance().ProfilingOn()) { + if (ProfilingManager::Instance().ProfilingModelLoadOn()) { std::vector compute_graph_desc_info; Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); if (ret1 != SUCCESS) { @@ -2384,14 +2384,15 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); GELOGI("Copy input data, model id:%u", model_id); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_START)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), + model->SetProfileTime(MODEL_PRE_PROC_START)); ret = model->CopyInputData(current_data, false); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); continue, "Copy input data to model failed."); // [No need to check value] - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_START)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); if (ProfilingManager::Instance().ProfilingOpTraceOn()) { GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { @@ -2444,10 +2445,11 @@ void *DavinciModel::Run(DavinciModel *model) { GELOGI("rtStreamSynchronize end."); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_END)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END)); } - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_START)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), + model->SetProfileTime(MODEL_AFTER_PROC_START)); GE_TIMESTAMP_START(ReturnResult3); // copy output data from device to host GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), @@ -2456,8 +2458,9 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_END)); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)model->SinkTimeProfile(current_data)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), + model->SetProfileTime(MODEL_AFTER_PROC_END)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); model->iterator_count_++; model->is_first_execute_ = false; @@ -3402,32 +3405,32 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa zero_copy_batch_label_addrs_.clear(); } - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); Status ret = CopyModelData(input_data, output_data, is_dynamic_); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", model_id_); GELOGI("current_data.index=%u", input_data.index); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_END)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); if (!task_list_.empty()) { GELOGD("rtModelExecute do"); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_START)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_END)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); GELOGI("rtModelExecute end"); } if (!is_async_mode_) { - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END)); } // report model time data - GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)SinkTimeProfile(input_data)); + GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); GELOGI("Model run end, model id:%u", model_id_); return SUCCESS; } diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 9f0b114b..cc8c8539 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -35,6 +35,14 @@ thread_local uint32_t device_count = 0; namespace { const int kCmdParSize = 2; const int kDumpCmdPairSize = 2; +const int kProfStartCmdParaSize = 2; +const std::string kCmdTypeProfile = "profile"; +const std::string kCmdTypeDump = "dump"; +const std::string kCmdTypeProfiling = "profiling"; +const std::string kCmdTypeProfInit = "prof_init"; +const std::string kCmdTypeProfFinalize = "prof_finalize"; +const std::string kCmdTypeProfStart = "prof_start"; +const std::string kCmdTypeProfStop = "prof_stop"; } // namespace DumpProperties ModelManager::dump_properties_; @@ -303,7 +311,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); @@ -531,7 +539,10 @@ Status ModelManager::Stop(uint32_t model_id) { /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { - {"profile", HandleProfileCommand}, {"dump", HandleDumpCommand}, {"profiling", HandleAclProfilingCommand}}; + {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, + {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, + {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, + {kCmdTypeProfStop, HandleProfStopCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -557,6 +568,71 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { return SUCCESS; } +Status ModelManager::HandleProfInitCommand(const Command &command) { + uint64_t module_index = command.module_index; + if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { + GELOGE(FAILED, "Handle prof init failed."); + return FAILED; + } + return SUCCESS; +} + +Status ModelManager::HandleProfFinalizeCommand(const Command &command) { + if (ProfilingManager::Instance().ProfFinalize() != SUCCESS) { + GELOGE(FAILED, "Handle prof finalize failed."); + return FAILED; + } + return SUCCESS; +} +/* + * cmd para when prof start + * "devNums:2" + * "devIdList:1,2" + * "profilingOption:PROF_OP_TRACE" + * "aicoreMetrics:AICORE_ARITHMATIC_THROUGHPUT" + */ +Status ModelManager::HandleProfStartCommand(const Command &command) { + if (command.cmd_params.size() < kProfStartCmdParaSize) { + GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2."); + return PARAM_INVALID; + } + std::map cmd_params_map; + uint32_t step = 2; + for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { + if (i + 1 >= command.cmd_params.size()) { + continue; + } + cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; + } + uint64_t module_index = command.module_index; + if (ProfilingManager::Instance().ProfStartProfiling(module_index, cmd_params_map) != SUCCESS) { + GELOGE(FAILED, "Handle prof start failed."); + return FAILED; + } + return SUCCESS; +} + +Status ModelManager::HandleProfStopCommand(const Command &command) { + if (command.cmd_params.size() < kProfStartCmdParaSize) { + GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2."); + return PARAM_INVALID; + } + std::map cmd_params_map; + uint32_t step = 2; + for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { + if (i + 1 >= command.cmd_params.size()) { + continue; + } + cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; + } + uint64_t module_index = command.module_index; + if (ProfilingManager::Instance().ProfStopProfiling(module_index, cmd_params_map) != SUCCESS) { + GELOGE(FAILED, "Handle prof finalize failed."); + return FAILED; + } + return SUCCESS; +} + Status ModelManager::HandleProfileCommand(const Command &command) { if (command.cmd_params.size() < kCmdParSize) { GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); @@ -577,15 +653,6 @@ Status ModelManager::HandleProfileCommand(const Command &command) { if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { PropertiesManager::Instance().SetPropertyValue(map_key, value); } - - if ((map_key == PROFILE_STOP_KEY) && (value == PROFILE_STOP_VALUE)) { - rtError_t rt_ret = rtProfilerStop(); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(PARAM_INVALID, "Call rtProfilerStop ret:%d", rt_ret); - return PARAM_INVALID; - } - } - return SUCCESS; } @@ -875,7 +942,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGI("Parse model %u success.", model_id); - if (ProfilingManager::Instance().ProfilingOn()) { + if (ProfilingManager::Instance().ProfilingModelLoadOn()) { davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index 2c650c82..0eaab1db 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -158,6 +158,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { static ge::Status HandleAclProfilingCommand(const Command &command); static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command); + static ge::Status HandleProfInitCommand(const Command &command); + static ge::Status HandleProfFinalizeCommand(const Command &command); + static ge::Status HandleProfStartCommand(const Command &command); + static ge::Status HandleProfStopCommand(const Command &command); /// /// @ingroup domi_ome /// @brief get model memory usage diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index 08f7ec9e..9a4e39f3 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -565,10 +565,12 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, } GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); - - PassManager graph_pass; - GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) - GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); + const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); + if (unknown_shape_skip != nullptr) { + PassManager graph_pass; + GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) + GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); + } GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); @@ -1951,9 +1953,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("MergePass", &merge_pass); names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); - names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); + names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); diff --git a/src/ge/graph/partition/dynamic_shape_partition.cc b/src/ge/graph/partition/dynamic_shape_partition.cc index 9cc7d0f4..d1b00f12 100644 --- a/src/ge/graph/partition/dynamic_shape_partition.cc +++ b/src/ge/graph/partition/dynamic_shape_partition.cc @@ -43,13 +43,18 @@ #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) +bool IsExperimental() { + const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr); + return kIsExperimental; +} + namespace ge { using Cluster = DynamicShapePartitioner::Cluster; using ClusterPtr = std::shared_ptr; Status DynamicShapePartitioner::Partition() { REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); - if (!GraphUtils::IsUnknownShapeGraph(root_graph_)) { + if (!IsExperimental()) { GELOGD("Skip dynamic shape partition as not in experimental mode."); REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), "Failed set dynamic shape partitioned flag on root graph."); diff --git a/src/ge/graph/passes/ctrl_edge_transfer_pass.cc b/src/ge/graph/passes/ctrl_edge_transfer_pass.cc index 6c426e95..9454c00d 100644 --- a/src/ge/graph/passes/ctrl_edge_transfer_pass.cc +++ b/src/ge/graph/passes/ctrl_edge_transfer_pass.cc @@ -20,7 +20,6 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/util.h" #include "graph/utils/graph_utils.h" -#include "graph/debug/ge_attr_define.h" namespace ge { /* Pass Explaination: @@ -43,12 +42,6 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { GELOGD("CtrlEdgeTransferPass start running"); GE_CHECK_NOTNULL(graph); - bool is_dynamic_shape = false; - (void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); - if (!is_dynamic_shape) { - return SUCCESS; - } - for (ge::NodePtr &n : graph->GetDirectNode()) { auto op_desc = n->GetOpDesc(); if (op_desc == nullptr) { diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h old mode 100755 new mode 100644 index 03f43649..276516e7 --- a/third_party/fwkacllib/inc/hccl/hccl_types.h +++ b/third_party/fwkacllib/inc/hccl/hccl_types.h @@ -1,99 +1,99 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hccl_types.h - * @brief HCCL data type definition - * - */ - -#ifndef HCCL_TYPES_H_ -#define HCCL_TYPES_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/** - * @brief HCCL functions return value definition - */ -typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ -} HcclResult; - -/** - * @brief handle to HCCL communicator - */ -typedef void *HcclComm; - -/** - * @brief HCCL Reduction opperation - */ -typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ -} HcclReduceOp; - -/** - * @brief HCCL data type - */ -typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ -} HcclDataType; - -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length - -/** - * @brief HCCL root info - */ -typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; -} HcclRootInfo; - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // HCCL_TYPES_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/third_party/fwkacllib/inc/register/host_cpu_context.h b/third_party/fwkacllib/inc/register/host_cpu_context.h index f7d4f52f..4d6d5855 100644 --- a/third_party/fwkacllib/inc/register/host_cpu_context.h +++ b/third_party/fwkacllib/inc/register/host_cpu_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 2ab522fa..ba9503cc 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -426,13 +426,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type); * @ingroup profiling_base * @brief start rts profiler. */ -RTS_API rtError_t rtProfilerStart(void); +RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); /** * @ingroup profiling_base * @brief stop rts profiler. */ -RTS_API rtError_t rtProfilerStop(void); +RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); /** * @ingroup profiling_base diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h new file mode 100644 index 00000000..4f216239 --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -0,0 +1,155 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MSPROF_ENGINE_PROF_ACL_API_H_ +#define MSPROF_ENGINE_PROF_ACL_API_H_ + +#define MSVP_MAX_DEV_NUM 64 +#define MSVP_PROF_API __attribute__((visibility("default"))) + +// DataTypeConfig +#define PROF_ACL_API 0x0001 +#define PROF_TASK_TIME 0x0002 +#define PROF_AICORE_METRICS 0x0004 +#define PROF_AICPU_TRACE 0x0008 +#define PROF_MODEL_EXECUTE 0x0010 +#define PROF_RUNTIME_API 0x0020 +#define PROF_RUNTIME_TRACE 0x0040 +#define PROF_SCHEDULE_TIMELINE 0x0080 +#define PROF_SCHEDULE_TRACE 0x0100 +#define PROF_AIVECTORCORE_METRICS 0x0200 +#define PROF_SUBTASK_TIME 0x0400 + +#define PROF_TRAINING_TRACE 0x0800 +#define PROF_HCCL_TRACE 0x1000 +#define PROF_DATA_PROCESS 0x2000 +#define PROF_TASK_TRACE 0x3842 + +#define PROF_MODEL_LOAD 0x8000000000000000 + +// DataTypeConfig MASK +#define PROF_ACL_API_MASK 0x0001 +#define PROF_TASK_TIME_MASK 0x0002 +#define PROF_AICORE_METRICS_MASK 0x0004 +#define PROF_AICPU_TRACE_MASK 0x0008 +#define PROF_MODEL_EXECUTE_MASK 0x0010 +#define PROF_RUNTIME_API_MASK 0x0020 +#define PROF_RUNTIME_TRACE_MASK 0x0040 +#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 +#define PROF_SCHEDULE_TRACE_MASK 0x0100 +#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 +#define PROF_SUBTASK_TIME_MASK 0x0400 + +#define PROF_TRAINING_TRACE_MASK 0x0800 +#define PROF_HCCL_TRACE_MASK 0x1000 +#define PROF_DATA_PROCESS_MASK 0x2000 + +#define PROF_MODEL_LOAD_MASK 0x8000000000000000 + +#include +#include + +/** + * @name ProrErrorCode + * @brief error code enum of prof_acl_apis + */ +enum ProfErrorCode { + PROF_ERROR_NONE = 0, // ok + PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr + PROF_ERROR_REPEAT_INIT, // profiling has already been inited + PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string + PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable + PROF_ERROR_FAILURE, // failed to init or start profiling + PROF_ERROR_NOT_INITED, // profiling has not been inited + PROF_ERROR_DEVICE_INVALID, // device id invalid + PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics + PROF_ERROR_REPEAT_START, // profiilng has already been started + PROF_ERROR_NOT_STARTED, // profiling has not been started +}; + +/** + * @brief transfer profiling config in acl.json to sample config + * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} + * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); + +/** + * @name ProfInit + * @brief init profiling + * @param profInitCfg [IN] config of init profiling of json format + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); + +/** + * @name ProfAicoreMetrics + * @brief aicore metrics enum + */ +enum ProfAicoreMetrics { + PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, + PROF_AICORE_PIPELINE = 1, + PROF_AICORE_SYNCHRONIZATION = 2, + PROF_AICORE_MEMORY = 3, + PROF_AICORE_INTERNAL_MEMORY = 4, + PROF_AICORE_STALL = 5, + PROF_AICORE_EVENT = 255 +}; + +/** + * @name ProfConfig + * @brief struct of ProfStart + */ +struct ProfConfig { + uint32_t devNums; // length of device id list + uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list + ProfAicoreMetrics aicoreMetrics; // aicore metric + uint64_t dataTypeConfig; // data type to start profiling +}; + +/** + * @name ProfStartProfiling + * @brief start profiling + * @param profStartCfg [IN] config to start profiling + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); + +/** + * @name ProfStopConfig + * @brief struct of ProfStop + */ +struct ProfStopConfig { + uint64_t padding; +}; + +/** + * @name ProfStopProfiling + * @brief stop profiling + * @param profStopCfg [IN] config to stop profiling + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); + +/** + * @name ProfFinalize + * @brief finalize profiling task + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfFinalize(); + +#endif // MSPROF_ENGINE_PROF_ACL_API_H_