Browse Source

sync-from-trunk-to-blue-zone-0917

tags/v1.0.0
wuweikang 4 years ago
parent
commit
d9194b5c3c
14 changed files with 745 additions and 167 deletions
  1. +1
    -0
      inc/framework/common/ge_types.h
  2. +343
    -19
      src/ge/common/profiling/profiling_manager.cc
  3. +24
    -3
      src/ge/common/profiling/profiling_manager.h
  4. +6
    -3
      src/ge/graph/build/task_generator.cc
  5. +18
    -15
      src/ge/graph/load/new_model_manager/davinci_model.cc
  6. +79
    -12
      src/ge/graph/load/new_model_manager/model_manager.cc
  7. +4
    -0
      src/ge/graph/load/new_model_manager/model_manager.h
  8. +7
    -5
      src/ge/graph/manager/graph_manager.cc
  9. +6
    -1
      src/ge/graph/partition/dynamic_shape_partition.cc
  10. +0
    -7
      src/ge/graph/passes/ctrl_edge_transfer_pass.cc
  11. +99
    -99
      third_party/fwkacllib/inc/hccl/hccl_types.h
  12. +1
    -1
      third_party/fwkacllib/inc/register/host_cpu_context.h
  13. +2
    -2
      third_party/fwkacllib/inc/runtime/base.h
  14. +155
    -0
      third_party/fwkacllib/inc/toolchain/prof_acl_api.h

+ 1
- 0
inc/framework/common/ge_types.h View File

@@ -93,6 +93,7 @@ struct OutputData {
struct Command { struct Command {
std::string cmd_type; // Command type std::string cmd_type; // Command type
std::vector<std::string> cmd_params; // Command params std::vector<std::string> cmd_params; // Command params
uint64_t module_index; // prof module
}; };


// The definition of I/O shape description // The definition of I/O shape description


+ 343
- 19
src/ge/common/profiling/profiling_manager.cc View File

@@ -34,6 +34,11 @@ const char *const kName = "name";
const char *const kTraceID = "traceId"; const char *const kTraceID = "traceId";
const char *const kProfDir = "resultPath"; const char *const kProfDir = "resultPath";
const size_t kReportMaxLen = 2048; const size_t kReportMaxLen = 2048;
const int32_t kMaxDeviceNum = 256;
const std::string kConfigNumsdev = "devNums";
const std::string kConfigDevIdList = "devIdList";
const std::string kProfStart = "prof_start";
const std::string kProfStop = "prof_stop";
} // namespace } // namespace


namespace ge { namespace ge {
@@ -64,7 +69,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
return ret; return ret;
} }


if (is_profiling_) {
if (is_load_profiling_) {
// register Framework to profiling // register Framework to profiling
int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); int result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
if (result != 0) { if (result != 0) {
@@ -92,7 +97,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
const std::string &config) { const std::string &config) {
#ifdef DAVINCI_SUPPORT_PROFILING #ifdef DAVINCI_SUPPORT_PROFILING
try { try {
is_profiling_ = false;
is_load_profiling_ = false;
is_execute_profiling_ = false;
profiling_opts_.clear(); profiling_opts_.clear();
op_trace_conf_.clear(); op_trace_conf_.clear();
Json start_prof_conf = Json::parse(config); Json start_prof_conf = Json::parse(config);
@@ -114,7 +120,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
} }
device_id_.push_back(std::stoi(device_id_str)); device_id_.push_back(std::stoi(device_id_str));
} }
if (is_all == true) {
if (is_all) {
int32_t count = 0; int32_t count = 0;
rtError_t rt_err = rtGetDeviceCount(&count); rtError_t rt_err = rtGetDeviceCount(&count);
if (rt_err != RT_ERROR_NONE) { if (rt_err != RT_ERROR_NONE) {
@@ -133,7 +139,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
GELOGE(FAILED, "Parse feature from acl cfg failed."); GELOGE(FAILED, "Parse feature from acl cfg failed.");
return FAILED; return FAILED;
} }
is_profiling_ = true;
is_load_profiling_ = true;
is_execute_profiling_ = true;
} catch (...) { } catch (...) {
GELOGE(FAILED, "Json conf is not invalid !"); GELOGE(FAILED, "Json conf is not invalid !");
return ge::PARAM_INVALID; return ge::PARAM_INVALID;
@@ -200,21 +207,25 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
const char *profiling_mode = std::getenv("PROFILING_MODE"); const char *profiling_mode = std::getenv("PROFILING_MODE");
const char *prof_options = std::getenv("PROFILING_OPTIONS"); const char *prof_options = std::getenv("PROFILING_OPTIONS");
if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) {
is_profiling_ = false;
is_load_profiling_ = false;
is_execute_profiling_ = false;
} else { } else {
std::string prof_options_str = std::string(prof_options); std::string prof_options_str = std::string(prof_options);
profiling_opts_ = StringUtils::Split(prof_options_str, ':'); profiling_opts_ = StringUtils::Split(prof_options_str, ':');
is_profiling_ = true;
is_load_profiling_ = true;
is_execute_profiling_ = true;
GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options); GELOGI("The profiling in env is %s, %s", profiling_mode, prof_options);
} }
if (!is_profiling_) {
if (!is_load_profiling_) {
const std::string enable_profiling = "1"; const std::string enable_profiling = "1";
if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) { if (options.profiling_mode != enable_profiling || options.profiling_options.empty()) {
is_profiling_ = false;
is_load_profiling_ = false;
is_execute_profiling_ = false;
return SUCCESS; return SUCCESS;
} else { } else {
profiling_opts_ = StringUtils::Split(options.profiling_options, ':'); profiling_opts_ = StringUtils::Split(options.profiling_options, ':');
is_profiling_ = true;
is_load_profiling_ = true;
is_execute_profiling_ = true;
GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str()); GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), options.profiling_options.c_str());
} }
} }
@@ -310,7 +321,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
} }


// runtime startup for profiling // runtime startup for profiling
GE_CHK_RT_RET(rtProfilerStart());
uint64_t module = GetProfilingModule();
int32_t device_num = 1;
uint32_t device_id_rt = static_cast<uint32_t>(device_id);
GE_CHK_RT_RET(rtProfilerStart(module, device_num, &device_id_rt));


// call profiling startup API // call profiling startup API
ProfMgrCfg prof_cfg = {send_profiling_config_}; ProfMgrCfg prof_cfg = {send_profiling_config_};
@@ -333,11 +347,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
int ret = reporter->Flush(); int ret = reporter->Flush();
GELOGI("Report data end, ret is %d", ret); GELOGI("Report data end, ret is %d", ret);
} }

rtError_t rt_ret = rtProfilerStop();
uint64_t module = GetProfilingModule();
int32_t device_num = static_cast<int32_t>(device_id_.size());
uint32_t *device_id_ptr = new (std::nothrow) uint32_t[device_num];
if (device_id_ptr == nullptr) {
GELOGE(FAILED, "Stop profiling device id ptr is null.");
return;
}
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]);
}
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGI("Call rtProfilerStop ret:%d", rt_ret);
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
} }
delete[] device_id_ptr;
device_id_ptr = nullptr;


for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { for (size_t i = 0; i < prof_handle_vec_.size(); ++i) {
int result = ProfMgrStop(prof_handle_vec_[i]); int result = ProfMgrStop(prof_handle_vec_[i]);
@@ -526,13 +551,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
return; return;
} }
GELOGI("current phy_device_id:%d", phy_device_id); GELOGI("current phy_device_id:%d", phy_device_id);

auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
if (ret == device_id_.end()) {
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
return;
if (!is_acl_api_mode_) {
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
if (ret == device_id_.end()) {
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
return;
}
} }

GELOGI("start ProfilingTaskDescInfo."); GELOGI("start ProfilingTaskDescInfo.");
ProfilingTaskDescInfo(task_desc_info, phy_device_id); ProfilingTaskDescInfo(task_desc_info, phy_device_id);
GELOGI("start ProfilingGraphDescInfo."); GELOGI("start ProfilingGraphDescInfo.");
@@ -546,6 +571,305 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfi
recv_profiling_config_ = profiling_cfg; recv_profiling_config_ = profiling_cfg;
} }


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() {
uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | PROF_RUNTIME_TRACE_MASK |
PROF_SCHEDULE_TIMELINE_MASK | PROF_SCHEDULE_TRACE_MASK | PROF_TASK_TIME_MASK |
PROF_SUBTASK_TIME_MASK | PROF_AICPU_TRACE_MASK | PROF_AICORE_METRICS_MASK |
PROF_AIVECTORCORE_METRICS_MASK | PROF_MODEL_LOAD_MASK;
return module;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK;

if (model_load_mask == PROF_MODEL_LOAD_MASK) {
// register Framework to profiling
int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
if (result != SUCCESS) {
GELOGE(FAILED, "Register profiling engine failed.");
return FAILED;
}
int32_t device_num = -1;
rtError_t rt_ret = rtProfilerStart(model_load_mask, device_num, nullptr);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Runtime profiler start failed.");
return FAILED;
}
is_load_profiling_ = true;
GELOGI("Prof init: model load profiling on.");
}

uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK;
if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
is_training_trace_ = true;
}
is_acl_api_mode_ = true;
GELOGI("Prof init success.");
#endif
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFinalize() {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
is_load_profiling_ = false;
is_training_trace_ = false;
is_acl_api_mode_ = false;

int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE);
if (ret != SUCCESS) {
GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret);
}
int32_t dev_num = -1;
rtError_t rt_ret = rtProfilerStop(PROF_MODEL_LOAD_MASK, dev_num, nullptr);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Runtime profiler stop failed.");
return FAILED;
}

for (auto device_id_module : device_id_module_map_) {
if (device_id_module.second != 0) {
uint32_t device_id = static_cast<uint32_t>(device_id_module.first);
GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second);
rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Runtime profiler stop failed.");
return FAILED;
}
}
}
device_id_module_map_.clear();
device_id_.clear();
GELOGI("Prof finalize success.");
#endif
return SUCCESS;
}

Status ProfilingManager::ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
vector<int32_t> &device_list) {
#ifdef DAVINCI_SUPPORT_PROFILING
auto iter = config_para.find(kConfigDevIdList);
if (iter != config_para.end()) {
std::string device_id_list = iter->second;
std::string temp;
vector<std::string> decvice_id;
for (uint32_t i = 0; i < device_id_list.size(); i++) {
if (isdigit(device_id_list[i])) {
temp.append(1, device_id_list[i]);
} else {
if (!temp.empty()) {
decvice_id.emplace_back(temp);
}
temp.clear();
}
}
if (!temp.empty()) {
decvice_id.emplace_back(temp);
}

for (uint32_t i = 0; i < decvice_id.size(); i++) {
try {
int32_t dev_id = std::stoi(decvice_id[i]);
device_list.push_back(dev_id);
} catch (std::invalid_argument &) {
GELOGE(FAILED, "Device id: %s is invalid.", decvice_id[i].c_str());
return FAILED;
} catch (std::out_of_range &) {
GELOGE(FAILED, "Device id: %s is out of range.", decvice_id[i].c_str());
} catch (...) {
GELOGE(FAILED, "Device id: %s cannot change to int.", decvice_id[i].c_str());
return FAILED;
}
}
} else {
GELOGE(FAILED, "Config para not contain device id list.");
return FAILED;
}
#endif
return SUCCESS;
}

Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num,
vector<int32_t> &device_list) {
#ifdef DAVINCI_SUPPORT_PROFILING
// device num
auto iter = config_para.find(kConfigNumsdev);
if (iter != config_para.end()) {
try {
device_num = std::stoi(iter->second);
} catch (std::invalid_argument &) {
GELOGE(FAILED, "Device nun: %s is invalid.", iter->second.c_str());
return FAILED;
} catch (std::out_of_range &) {
GELOGE(FAILED, "Device num: %s is out of range.", iter->second.c_str());
} catch (...) {
GELOGE(FAILED, "Device num: %s cannot change to int.", iter->second.c_str());
return FAILED;
}
} else {
GELOGE(FAILED, "Config para not contain device num.");
return FAILED;
}
// device id
if (ProfParseDeviceId(config_para, device_list) != SUCCESS) {
GELOGE(FAILED, "Parse config para device id failed.");
return FAILED;
}

if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) {
GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size());
return FAILED;
}
#endif
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
ProfilingManager::ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
int32_t device_num = 0;
vector<int32_t> device_list;
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
GELOGE(FAILED, "Prof start parse param failed.");
return FAILED;
}
auto *device_id = new (std::nothrow) uint32_t[device_num];
if (device_id == nullptr) {
GELOGE(FAILED, "Prof start parse param failed.");
return FAILED;
}
for (int32_t i = 0; i < device_num; i++) {
device_id[i] = static_cast<uint32_t>(device_list[i]);
}
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num);
rtError_t rt_ret = rtProfilerStart(module, device_num, device_id);
if (rt_ret != RT_ERROR_NONE) {
delete[] device_id;
GELOGE(FAILED, "Runtime profiler config proc failed.");
return FAILED;
}
delete[] device_id;
device_id = nullptr;
if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) {
for (int32_t i = 0; i < device_num; i++) {
if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) {
device_id_.push_back(device_list[i]);
}
}
GELOGI("Prof start: ge execute model start profiling.");
}
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) {
GELOGW("Prof start: load model module is invalid.");
}
UpdateDeviceIdModuleMap(kProfStart, module, device_list);
GELOGI("Prof start profiling success.");
#endif
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
ProfilingManager::ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
int32_t device_num = 0;
vector<int32_t> device_list;
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
GELOGE(FAILED, "Prof stop parse param failed.");
return FAILED;
}
auto *device_id = new (std::nothrow) uint32_t[device_num];
if (device_id == nullptr) {
GELOGE(FAILED, "Prof stop parse param failed.");
return FAILED;
}
for (int32_t i = 0; i < device_num; i++) {
device_id[i] = static_cast<uint32_t>(device_list[i]);
}
GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num);
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id);
if (rt_ret != RT_ERROR_NONE) {
delete[] device_id;
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed.");
return FAILED;
}
delete[] device_id;
device_id = nullptr;
uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK;
if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) {
for (int32_t i = 0; i < device_num; i++) {
auto iter = std::find(device_id_.begin(), device_id_.end(), device_list[i]);
if (iter != device_id_.end()) {
device_id_.erase(iter);
}
}
GELOGI("Prof stop: ge execute model stop profiling.");
}
if ((module & PROF_MODEL_LOAD_MASK) == PROF_MODEL_LOAD_MASK) {
GELOGW("Prof stop: load model module is invalid.");
}
UpdateDeviceIdModuleMap(kProfStop, module, device_list);
GELOGI("Prof stop profiling success.");
#endif
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap(
string prof_type, uint64_t module, const vector<int32_t> &device_list) {
#ifdef DAVINCI_SUPPORT_PROFILING
if (prof_type == kProfStart) {
for (uint32_t i = 0; i < device_list.size(); i++) {
auto iter = device_id_module_map_.find(device_list[i]);
if (iter != device_id_module_map_.end()) {
uint64_t prof_on_module = device_id_module_map_[device_list[i]];
// save all profiling on module of device
device_id_module_map_[device_list[i]] = prof_on_module | module;
} else {
device_id_module_map_[device_list[i]] = module;
}
}
} else if (prof_type == kProfStop) {
for (uint32_t i = 0; i < device_list.size(); i++) {
auto iter = device_id_module_map_.find(device_list[i]);
if (iter != device_id_module_map_.end()) {
uint64_t prof_on_module = device_id_module_map_[device_list[i]];
uint64_t prof_off_module = prof_on_module & module;
uint64_t prof_on_left_module = prof_on_module & (~prof_off_module);
// stop profiling on module of device
device_id_module_map_[device_list[i]] = prof_on_left_module;
}
}
} else {
GELOGI("No need to update device_id module map.");
}
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::ProfilingModelExecuteOn() const {
int32_t logic_device_id = 0;
rtError_t rt_ret = rtGetDevice(&logic_device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
}
GELOGI("Current logic_device_id:%d", logic_device_id);

uint32_t phy_device_id = 0;
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
}
GELOGI("Current phy_device_id:%d", phy_device_id);
bool execute_model_prof_on = false;
auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
if (iter != device_id_.end()) {
execute_model_prof_on = true;
}
GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on);
return is_execute_profiling_ || execute_model_prof_on;
}

/** /**
* @brief Profiling PluginImpl * @brief Profiling PluginImpl
*/ */


+ 24
- 3
src/ge/common/profiling/profiling_manager.h View File

@@ -18,6 +18,7 @@
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_


#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <mutex>
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
@@ -27,14 +28,17 @@
#include "external/register/register_types.h" #include "external/register/register_types.h"
#include "toolchain/prof_engine.h" #include "toolchain/prof_engine.h"
#include "toolchain/prof_mgr_core.h" #include "toolchain/prof_mgr_core.h"
#include "toolchain/prof_acl_api.h"


using std::map; using std::map;
using std::string; using std::string;
using std::vector; using std::vector;
using Json = nlohmann::json; using Json = nlohmann::json;


namespace ge {
namespace {
const std::string GE_PROFILING_MODULE = "Framework"; const std::string GE_PROFILING_MODULE = "Framework";
} // namespace
namespace ge {
// register Plugin // register Plugin
class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf {
public: public:
@@ -69,10 +73,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
ge::Status InitFromOptions(const Options &options); ge::Status InitFromOptions(const Options &options);
ge::Status InitFromAclCfg(const std::string &config); ge::Status InitFromAclCfg(const std::string &config);
ge::Status StartProfiling(int32_t iter, int32_t device_id); ge::Status StartProfiling(int32_t iter, int32_t device_id);
ge::Status ProfInit(uint64_t module);
ge::Status ProfFinalize();
ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
ge::Status ProfStopProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
void StopProfiling(); void StopProfiling();
bool ProfilingOpTraceOn() const { return is_op_trace_; } bool ProfilingOpTraceOn() const { return is_op_trace_; }
bool ProfilingLoadFlag() const { return is_load_; } bool ProfilingLoadFlag() const { return is_load_; }
bool ProfilingOn() const { return is_profiling_; }
bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
bool ProfilingModelLoadOn() const { return is_load_profiling_; }
bool ProfilingModelExecuteOn() const;
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern
int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; }
void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info, void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
@@ -87,9 +98,17 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {


private: private:
ge::Status ParseFeaturesFromAclCfg(const Json &feature); ge::Status ParseFeaturesFromAclCfg(const Json &feature);
bool is_profiling_ = false;
ge::Status ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num,
vector<int32_t> &device_list);
ge::Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, vector<int32_t> &device_list);
uint64_t GetProfilingModule();
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list);
bool is_load_profiling_ = false;
bool is_execute_profiling_ = false;
bool is_op_trace_ = false; bool is_op_trace_ = false;
bool is_load_ = false; bool is_load_ = false;
bool is_training_trace_ = false;
bool is_acl_api_mode_ = false;
int32_t op_trace_iter_num_ = 0; int32_t op_trace_iter_num_ = 0;
string job_id_; string job_id_;
string prof_dir_; string prof_dir_;
@@ -102,6 +121,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
string system_trace_conf_; string system_trace_conf_;
string task_trace_conf_; string task_trace_conf_;
const ProfilingEngineImpl engine_; const ProfilingEngineImpl engine_;
map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module
std::mutex mutex_;
}; };
} // namespace ge } // namespace ge
#endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_ #endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_

+ 6
- 3
src/ge/graph/build/task_generator.cc View File

@@ -806,7 +806,8 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
GELOGI("Start FindProfilingTaskIndex."); GELOGI("Start FindProfilingTaskIndex.");
GE_CHECK_NOTNULL(graph); GE_CHECK_NOTNULL(graph);
const char *profiling_mode = std::getenv(kProfilingMode); const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn();
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
if (!is_profiling) { if (!is_profiling) {
GELOGW("Profiling is not open."); GELOGW("Profiling is not open.");
return SUCCESS; return SUCCESS;
@@ -853,7 +854,8 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const
vector<uint32_t> &all_reduce_nodes, uint32_t node_index, vector<uint32_t> &all_reduce_nodes, uint32_t node_index,
vector<domi::TaskDef> &task_def_list) { vector<domi::TaskDef> &task_def_list) {
const char *profiling_mode = std::getenv(kProfilingMode); const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn();
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
(profiling_point.end_index == 0)) { (profiling_point.end_index == 0)) {
return SUCCESS; return SUCCESS;
@@ -909,7 +911,8 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
vector<domi::TaskDef> &task_def_list) { vector<domi::TaskDef> &task_def_list) {
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
const char *profiling_mode = std::getenv(kProfilingMode); const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn();
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) ||
(profiling_point.end_index == 0)) { (profiling_point.end_index == 0)) {
return SUCCESS; return SUCCESS;


+ 18
- 15
src/ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -662,7 +662,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_);


// collect profiling for ge // collect profiling for ge
if (ProfilingManager::Instance().ProfilingOn()) {
if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info);
if (ret1 != SUCCESS) { if (ret1 != SUCCESS) {
@@ -2384,14 +2384,15 @@ void *DavinciModel::Run(DavinciModel *model) {
GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData"));


GELOGI("Copy input data, model id:%u", model_id); GELOGI("Copy input data, model id:%u", model_id);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_START));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
model->SetProfileTime(MODEL_PRE_PROC_START));
ret = model->CopyInputData(current_data, false); ret = model->CopyInputData(current_data, false);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput());
CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
continue, "Copy input data to model failed."); // [No need to check value] continue, "Copy input data to model failed."); // [No need to check value]
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_START));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START));
if (ProfilingManager::Instance().ProfilingOpTraceOn()) { if (ProfilingManager::Instance().ProfilingOpTraceOn()) {
GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum());
for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) { for (int32_t i = 0; i < ProfilingManager::Instance().GetOpTraceIterNum(); i++) {
@@ -2444,10 +2445,11 @@ void *DavinciModel::Run(DavinciModel *model) {
GELOGI("rtStreamSynchronize end."); GELOGI("rtStreamSynchronize end.");
GE_IF_BOOL_EXEC(model->is_first_execute_, GE_IF_BOOL_EXEC(model->is_first_execute_,
GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize"));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_INFER_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_END));
} }


GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_START));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
model->SetProfileTime(MODEL_AFTER_PROC_START));
GE_TIMESTAMP_START(ReturnResult3); GE_TIMESTAMP_START(ReturnResult3);
// copy output data from device to host // copy output data from device to host
GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), GE_IF_BOOL_EXEC(!model->output_op_list_.empty(),
@@ -2456,8 +2458,9 @@ void *DavinciModel::Run(DavinciModel *model) {
GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index));
GE_IF_BOOL_EXEC(model->is_first_execute_, GE_IF_BOOL_EXEC(model->is_first_execute_,
GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost"));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), model->SetProfileTime(MODEL_AFTER_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)model->SinkTimeProfile(current_data));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
model->SetProfileTime(MODEL_AFTER_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data));


model->iterator_count_++; model->iterator_count_++;
model->is_first_execute_ = false; model->is_first_execute_ = false;
@@ -3402,32 +3405,32 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
zero_copy_batch_label_addrs_.clear(); zero_copy_batch_label_addrs_.clear();
} }


GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START));
Status ret = CopyModelData(input_data, output_data, is_dynamic_); Status ret = CopyModelData(input_data, output_data, is_dynamic_);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u",
model_id_); model_id_);


GELOGI("current_data.index=%u", input_data.index); GELOGI("current_data.index=%u", input_data.index);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));


if (!task_list_.empty()) { if (!task_list_.empty()) {
GELOGD("rtModelExecute do"); GELOGD("rtModelExecute do");
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_START));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
GELOGI("rtModelExecute end"); GELOGI("rtModelExecute end");
} }


if (!is_async_mode_) { if (!is_async_mode_) {
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed.");
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END));
} }


// report model time data // report model time data
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), (void)SinkTimeProfile(input_data));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data));
GELOGI("Model run end, model id:%u", model_id_); GELOGI("Model run end, model id:%u", model_id_);
return SUCCESS; return SUCCESS;
} }


+ 79
- 12
src/ge/graph/load/new_model_manager/model_manager.cc View File

@@ -35,6 +35,14 @@ thread_local uint32_t device_count = 0;
namespace { namespace {
const int kCmdParSize = 2; const int kCmdParSize = 2;
const int kDumpCmdPairSize = 2; const int kDumpCmdPairSize = 2;
const int kProfStartCmdParaSize = 2;
const std::string kCmdTypeProfile = "profile";
const std::string kCmdTypeDump = "dump";
const std::string kCmdTypeProfiling = "profiling";
const std::string kCmdTypeProfInit = "prof_init";
const std::string kCmdTypeProfFinalize = "prof_finalize";
const std::string kCmdTypeProfStart = "prof_start";
const std::string kCmdTypeProfStop = "prof_stop";
} // namespace } // namespace


DumpProperties ModelManager::dump_properties_; DumpProperties ModelManager::dump_properties_;
@@ -303,7 +311,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge


GELOGI("Parse model %u success.", model_id); GELOGI("Parse model %u success.", model_id);


if (ProfilingManager::Instance().ProfilingOn()) {
if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END); davinci_model->SetProfileTime(MODEL_LOAD_END);
@@ -531,7 +539,10 @@ Status ModelManager::Stop(uint32_t model_id) {
/// ///
Status ModelManager::HandleCommand(const Command &command) { Status ModelManager::HandleCommand(const Command &command) {
static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = { static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = {
{"profile", HandleProfileCommand}, {"dump", HandleDumpCommand}, {"profiling", HandleAclProfilingCommand}};
{kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand},
{kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand},
{kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand},
{kCmdTypeProfStop, HandleProfStopCommand}};


auto iter = cmds.find(command.cmd_type); auto iter = cmds.find(command.cmd_type);
if (iter == cmds.end()) { if (iter == cmds.end()) {
@@ -557,6 +568,71 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) {
return SUCCESS; return SUCCESS;
} }


Status ModelManager::HandleProfInitCommand(const Command &command) {
uint64_t module_index = command.module_index;
if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) {
GELOGE(FAILED, "Handle prof init failed.");
return FAILED;
}
return SUCCESS;
}

Status ModelManager::HandleProfFinalizeCommand(const Command &command) {
if (ProfilingManager::Instance().ProfFinalize() != SUCCESS) {
GELOGE(FAILED, "Handle prof finalize failed.");
return FAILED;
}
return SUCCESS;
}
/*
* cmd para when prof start
* "devNums:2"
* "devIdList:1,2"
* "profilingOption:PROF_OP_TRACE"
* "aicoreMetrics:AICORE_ARITHMATIC_THROUGHPUT"
*/
Status ModelManager::HandleProfStartCommand(const Command &command) {
if (command.cmd_params.size() < kProfStartCmdParaSize) {
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2.");
return PARAM_INVALID;
}
std::map<std::string, std::string> cmd_params_map;
uint32_t step = 2;
for (uint32_t i = 0; i < command.cmd_params.size(); i += step) {
if (i + 1 >= command.cmd_params.size()) {
continue;
}
cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1];
}
uint64_t module_index = command.module_index;
if (ProfilingManager::Instance().ProfStartProfiling(module_index, cmd_params_map) != SUCCESS) {
GELOGE(FAILED, "Handle prof start failed.");
return FAILED;
}
return SUCCESS;
}

Status ModelManager::HandleProfStopCommand(const Command &command) {
if (command.cmd_params.size() < kProfStartCmdParaSize) {
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2.");
return PARAM_INVALID;
}
std::map<std::string, std::string> cmd_params_map;
uint32_t step = 2;
for (uint32_t i = 0; i < command.cmd_params.size(); i += step) {
if (i + 1 >= command.cmd_params.size()) {
continue;
}
cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1];
}
uint64_t module_index = command.module_index;
if (ProfilingManager::Instance().ProfStopProfiling(module_index, cmd_params_map) != SUCCESS) {
GELOGE(FAILED, "Handle prof finalize failed.");
return FAILED;
}
return SUCCESS;
}

Status ModelManager::HandleProfileCommand(const Command &command) { Status ModelManager::HandleProfileCommand(const Command &command) {
if (command.cmd_params.size() < kCmdParSize) { if (command.cmd_params.size() < kCmdParSize) {
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2.");
@@ -577,15 +653,6 @@ Status ModelManager::HandleProfileCommand(const Command &command) {
if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) {
PropertiesManager::Instance().SetPropertyValue(map_key, value); PropertiesManager::Instance().SetPropertyValue(map_key, value);
} }

if ((map_key == PROFILE_STOP_KEY) && (value == PROFILE_STOP_VALUE)) {
rtError_t rt_ret = rtProfilerStop();
if (rt_ret != RT_ERROR_NONE) {
GELOGE(PARAM_INVALID, "Call rtProfilerStop ret:%d", rt_ret);
return PARAM_INVALID;
}
}

return SUCCESS; return SUCCESS;
} }


@@ -875,7 +942,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model


GELOGI("Parse model %u success.", model_id); GELOGI("Parse model %u success.", model_id);


if (ProfilingManager::Instance().ProfilingOn()) {
if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END); davinci_model->SetProfileTime(MODEL_LOAD_END);


+ 4
- 0
src/ge/graph/load/new_model_manager/model_manager.h View File

@@ -158,6 +158,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
static ge::Status HandleAclProfilingCommand(const Command &command); static ge::Status HandleAclProfilingCommand(const Command &command);
static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleProfileCommand(const Command &command);
static ge::Status HandleDumpCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command);
static ge::Status HandleProfInitCommand(const Command &command);
static ge::Status HandleProfFinalizeCommand(const Command &command);
static ge::Status HandleProfStartCommand(const Command &command);
static ge::Status HandleProfStopCommand(const Command &command);
/// ///
/// @ingroup domi_ome /// @ingroup domi_ome
/// @brief get model memory usage /// @brief get model memory usage


+ 7
- 5
src/ge/graph/manager/graph_manager.cc View File

@@ -565,10 +565,12 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node,
} }
GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph);
GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed);

PassManager graph_pass;
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass))
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph));
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip != nullptr) {
PassManager graph_pass;
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass))
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph));
}


GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed.");
GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); GELOGI("PreRun:PreRunOptimizeOriginalGraph success.");
@@ -1951,9 +1953,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
names_to_passes.emplace_back("MergePass", &merge_pass); names_to_passes.emplace_back("MergePass", &merge_pass);
names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass);
names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass);
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass);
names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass);
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass);
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass);


+ 6
- 1
src/ge/graph/partition/dynamic_shape_partition.cc View File

@@ -43,13 +43,18 @@
#define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__) #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__)
#define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__)


bool IsExperimental() {
const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr);
return kIsExperimental;
}

namespace ge { namespace ge {
using Cluster = DynamicShapePartitioner::Cluster; using Cluster = DynamicShapePartitioner::Cluster;
using ClusterPtr = std::shared_ptr<Cluster>; using ClusterPtr = std::shared_ptr<Cluster>;


Status DynamicShapePartitioner::Partition() { Status DynamicShapePartitioner::Partition() {
REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr."); REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr.");
if (!GraphUtils::IsUnknownShapeGraph(root_graph_)) {
if (!IsExperimental()) {
GELOGD("Skip dynamic shape partition as not in experimental mode."); GELOGD("Skip dynamic shape partition as not in experimental mode.");
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false),
"Failed set dynamic shape partitioned flag on root graph."); "Failed set dynamic shape partitioned flag on root graph.");


+ 0
- 7
src/ge/graph/passes/ctrl_edge_transfer_pass.cc View File

@@ -20,7 +20,6 @@
#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
#include "framework/common/util.h" #include "framework/common/util.h"
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"


namespace ge { namespace ge {
/* Pass Explaination: /* Pass Explaination:
@@ -43,12 +42,6 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) {
GELOGD("CtrlEdgeTransferPass start running"); GELOGD("CtrlEdgeTransferPass start running");
GE_CHECK_NOTNULL(graph); GE_CHECK_NOTNULL(graph);


bool is_dynamic_shape = false;
(void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
if (!is_dynamic_shape) {
return SUCCESS;
}

for (ge::NodePtr &n : graph->GetDirectNode()) { for (ge::NodePtr &n : graph->GetDirectNode()) {
auto op_desc = n->GetOpDesc(); auto op_desc = n->GetOpDesc();
if (op_desc == nullptr) { if (op_desc == nullptr) {


+ 99
- 99
third_party/fwkacllib/inc/hccl/hccl_types.h View File

@@ -1,99 +1,99 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file hccl_types.h
* @brief HCCL data type definition
*
*/
#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;
/**
* @brief handle to HCCL communicator
*/
typedef void *HcclComm;
/**
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;
/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length
/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file hccl_types.h
* @brief HCCL data type definition
*
*/
#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;
/**
* @brief handle to HCCL communicator
*/
typedef void *HcclComm;
/**
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;
/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length
/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_

+ 1
- 1
third_party/fwkacllib/inc/register/host_cpu_context.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 2
- 2
third_party/fwkacllib/inc/runtime/base.h View File

@@ -426,13 +426,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type);
* @ingroup profiling_base * @ingroup profiling_base
* @brief start rts profiler. * @brief start rts profiler.
*/ */
RTS_API rtError_t rtProfilerStart(void);
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList);


/** /**
* @ingroup profiling_base * @ingroup profiling_base
* @brief stop rts profiler. * @brief stop rts profiler.
*/ */
RTS_API rtError_t rtProfilerStop(void);
RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList);


/** /**
* @ingroup profiling_base * @ingroup profiling_base


+ 155
- 0
third_party/fwkacllib/inc/toolchain/prof_acl_api.h View File

@@ -0,0 +1,155 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MSPROF_ENGINE_PROF_ACL_API_H_
#define MSPROF_ENGINE_PROF_ACL_API_H_

#define MSVP_MAX_DEV_NUM 64
#define MSVP_PROF_API __attribute__((visibility("default")))

// DataTypeConfig
#define PROF_ACL_API 0x0001
#define PROF_TASK_TIME 0x0002
#define PROF_AICORE_METRICS 0x0004
#define PROF_AICPU_TRACE 0x0008
#define PROF_MODEL_EXECUTE 0x0010
#define PROF_RUNTIME_API 0x0020
#define PROF_RUNTIME_TRACE 0x0040
#define PROF_SCHEDULE_TIMELINE 0x0080
#define PROF_SCHEDULE_TRACE 0x0100
#define PROF_AIVECTORCORE_METRICS 0x0200
#define PROF_SUBTASK_TIME 0x0400

#define PROF_TRAINING_TRACE 0x0800
#define PROF_HCCL_TRACE 0x1000
#define PROF_DATA_PROCESS 0x2000
#define PROF_TASK_TRACE 0x3842

#define PROF_MODEL_LOAD 0x8000000000000000

// DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x0001
#define PROF_TASK_TIME_MASK 0x0002
#define PROF_AICORE_METRICS_MASK 0x0004
#define PROF_AICPU_TRACE_MASK 0x0008
#define PROF_MODEL_EXECUTE_MASK 0x0010
#define PROF_RUNTIME_API_MASK 0x0020
#define PROF_RUNTIME_TRACE_MASK 0x0040
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080
#define PROF_SCHEDULE_TRACE_MASK 0x0100
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200
#define PROF_SUBTASK_TIME_MASK 0x0400

#define PROF_TRAINING_TRACE_MASK 0x0800
#define PROF_HCCL_TRACE_MASK 0x1000
#define PROF_DATA_PROCESS_MASK 0x2000

#define PROF_MODEL_LOAD_MASK 0x8000000000000000

#include <cstdint>
#include <string>

/**
* @name ProrErrorCode
* @brief error code enum of prof_acl_apis
*/
enum ProfErrorCode {
PROF_ERROR_NONE = 0, // ok
PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr
PROF_ERROR_REPEAT_INIT, // profiling has already been inited
PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string
PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable
PROF_ERROR_FAILURE, // failed to init or start profiling
PROF_ERROR_NOT_INITED, // profiling has not been inited
PROF_ERROR_DEVICE_INVALID, // device id invalid
PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics
PROF_ERROR_REPEAT_START, // profiilng has already been started
PROF_ERROR_NOT_STARTED, // profiling has not been started
};

/**
* @brief transfer profiling config in acl.json to sample config
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);

/**
* @name ProfInit
* @brief init profiling
* @param profInitCfg [IN] config of init profiling of json format
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);

/**
* @name ProfAicoreMetrics
* @brief aicore metrics enum
*/
enum ProfAicoreMetrics {
PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
PROF_AICORE_PIPELINE = 1,
PROF_AICORE_SYNCHRONIZATION = 2,
PROF_AICORE_MEMORY = 3,
PROF_AICORE_INTERNAL_MEMORY = 4,
PROF_AICORE_STALL = 5,
PROF_AICORE_EVENT = 255
};

/**
* @name ProfConfig
* @brief struct of ProfStart
*/
struct ProfConfig {
uint32_t devNums; // length of device id list
uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
ProfAicoreMetrics aicoreMetrics; // aicore metric
uint64_t dataTypeConfig; // data type to start profiling
};

/**
* @name ProfStartProfiling
* @brief start profiling
* @param profStartCfg [IN] config to start profiling
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);

/**
* @name ProfStopConfig
* @brief struct of ProfStop
*/
struct ProfStopConfig {
uint64_t padding;
};

/**
* @name ProfStopProfiling
* @brief stop profiling
* @param profStopCfg [IN] config to stop profiling
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);

/**
* @name ProfFinalize
* @brief finalize profiling task
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfFinalize();

#endif // MSPROF_ENGINE_PROF_ACL_API_H_

Loading…
Cancel
Save