@@ -21,6 +21,7 @@ | |||
#include "framework/common/string_util.h" | |||
#include "graph/ge_context.h" | |||
#include "runtime/base.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
namespace { | |||
const char *const kJobID = "jobID"; | |||
@@ -39,10 +40,12 @@ const std::string kConfigNumsdev = "devNums"; | |||
const std::string kConfigDevIdList = "devIdList"; | |||
const std::string kProfStart = "prof_start"; | |||
const std::string kProfStop = "prof_stop"; | |||
const std::string kProfModelSubscribe = "prof_model_subscribe"; | |||
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
} // namespace | |||
namespace ge { | |||
ProfilingManager::ProfilingManager() {} | |||
ProfilingManager::ProfilingManager() : subscribe_count_(0) {} | |||
ProfilingManager::~ProfilingManager() {} | |||
@@ -54,6 +57,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
vector<int32_t>().swap(device_id_); | |||
subscribe_count_ = 0; | |||
job_id_ = options.job_id; | |||
GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); | |||
@@ -382,7 +386,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); | |||
if (reporter == nullptr) { | |||
@@ -401,7 +405,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
.append(op_name).append(" ") | |||
.append(std::to_string(block_dim).append(" ") | |||
.append(std::to_string(task_id)).append(" ") | |||
.append(std::to_string(stream_id)).append("\n")); | |||
.append(std::to_string(stream_id)).append(" ") | |||
.append(std::to_string(model_id)).append("\n")); | |||
Msprof::Engine::ReporterData reporter_data{}; | |||
reporter_data.deviceId = device_id; | |||
@@ -425,7 +430,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||
uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); | |||
GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); | |||
@@ -483,6 +488,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||
data.append("\""); | |||
} | |||
data.append(" model_id:").append(std::to_string(model_id)); | |||
data.append("\n"); | |||
Msprof::Engine::ReporterData reporter_data{}; | |||
@@ -537,7 +544,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | |||
const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
bool check_device) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
int32_t logic_device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||
@@ -546,7 +555,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
return; | |||
} | |||
GELOGI("current logic_device_id:%d", logic_device_id); | |||
if (!is_acl_api_mode_) { | |||
if (check_device) { | |||
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||
if (ret == device_id_.end()) { | |||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||
@@ -554,9 +563,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
} | |||
} | |||
GELOGI("start ProfilingTaskDescInfo."); | |||
ProfilingTaskDescInfo(task_desc_info, logic_device_id); | |||
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | |||
GELOGI("start ProfilingGraphDescInfo."); | |||
ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); | |||
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||
GELOGI("Report profiling data for GE end."); | |||
#endif | |||
} | |||
@@ -581,6 +590,105 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP | |||
return module; | |||
} | |||
void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, | |||
uint32_t device_id, | |||
uint64_t module) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
if (prof_type == kProfModelSubscribe) { | |||
if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { | |||
subs_dev_module_[device_id].subscribe_count++; | |||
} else { | |||
DeviceSubsInfo dev_info; | |||
dev_info.module = module; | |||
dev_info.subscribe_count = 1; | |||
subs_dev_module_[device_id] = dev_info; | |||
} | |||
} else if (prof_type == kProfModelUnsubscribe) { | |||
if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { | |||
if (subs_dev_module_[device_id].subscribe_count > 0) { | |||
subs_dev_module_[device_id].subscribe_count--; | |||
} | |||
} | |||
} else { | |||
GELOGI("No need to update device_id module map."); | |||
} | |||
#endif | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelSubscribe( | |||
uint64_t module, void *model) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; | |||
if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { | |||
// register framework to profiling | |||
int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); | |||
if (result != SUCCESS) { | |||
GELOGE(FAILED, "Register profiling engine failed."); | |||
return FAILED; | |||
} | |||
GELOGI("Prof subscribe: model load profiling on."); | |||
} | |||
subscribe_count_++; | |||
auto davinci_model = static_cast<DavinciModel *>(model); | |||
int32_t device_num = 1; | |||
uint32_t device[1]; | |||
device[0] = davinci_model->GetDeviceId(); | |||
rtError_t rt_ret = rtProfilerStart(module, device_num, device); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Runtime profiler start failed."); | |||
return FAILED; | |||
} | |||
UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module); | |||
// Report profiling data | |||
Status p_ret = davinci_model->ReportProfilingData(false); | |||
if (p_ret != SUCCESS) { | |||
GELOGE(p_ret, "Report profiling data failed."); | |||
return p_ret; | |||
} | |||
#endif | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelUnsubscribe( | |||
void *model) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
if (subscribe_count_ == 0) { | |||
GELOGW("The profiler has not been subscribed, you do not need to cannel the subscription."); | |||
return SUCCESS; | |||
} | |||
auto davinci_model = static_cast<DavinciModel *>(model); | |||
int32_t dev_num = 1; | |||
uint32_t device[1]; | |||
device[0] = davinci_model->GetDeviceId(); | |||
auto iter = subs_dev_module_.find(device[0]); | |||
if (iter != subs_dev_module_.end()) { | |||
if (subs_dev_module_[device[0]].subscribe_count == 1) { | |||
rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Runtime profiler stop failed."); | |||
return FAILED; | |||
} | |||
} | |||
UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); | |||
} | |||
subscribe_count_--; | |||
if (subscribe_count_ == 0) { | |||
int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); | |||
return ret; | |||
} | |||
} | |||
#endif | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
@@ -748,6 +856,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | |||
} | |||
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||
rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Runtime profiler config proc failed."); | |||
@@ -39,6 +39,10 @@ namespace { | |||
const std::string GE_PROFILING_MODULE = "Framework"; | |||
} // namespace | |||
namespace ge { | |||
struct DeviceSubsInfo { | |||
uint64_t module; | |||
uint32_t subscribe_count; | |||
}; | |||
// register Plugin | |||
class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { | |||
public: | |||
@@ -73,6 +77,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
ge::Status InitFromOptions(const Options &options); | |||
ge::Status InitFromAclCfg(const std::string &config); | |||
ge::Status StartProfiling(int32_t iter, int32_t device_id); | |||
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | |||
ge::Status ProfModelSubscribe(uint64_t module, void *model); | |||
ge::Status ProfModelUnsubscribe(void *model); | |||
ge::Status ProfInit(uint64_t module); | |||
ge::Status ProfFinalize(); | |||
ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para); | |||
@@ -84,13 +91,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
bool ProfilingModelLoadOn() const { return is_load_profiling_; } | |||
bool ProfilingModelExecuteOn() const; | |||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern | |||
bool IsAclApiMode() const { return is_acl_api_mode_; } | |||
int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } | |||
void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info, | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
bool check_device); | |||
void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, | |||
Msprof::Engine::ReporterData &reporter_data); | |||
void ProfilingTaskDescInfo(const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id); | |||
void ProfilingGraphDescInfo(const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
const int32_t &device_id); | |||
void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
const int32_t &device_id); | |||
void SetProfilingConfig(const string &profiling_cfg); | |||
vector<int32_t> GetProfilingDeviceId() const { return device_id_; } | |||
@@ -122,6 +132,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
string task_trace_conf_; | |||
const ProfilingEngineImpl engine_; | |||
map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module | |||
map<uint32_t, DeviceSubsInfo> subs_dev_module_; // key: device_id, value: profiling on module | |||
uint32_t subscribe_count_; | |||
std::mutex mutex_; | |||
}; | |||
} // namespace ge | |||
@@ -54,6 +54,7 @@ const std::map<std::string, std::string> PROFILE_COMPONENT_MAP{ | |||
{"runtime", RTS_PROFILE}, | |||
}; | |||
const std::string PROFILE_CONFIG = "config"; | |||
const std::string PROFILE_MODEL_ID = "modelId"; | |||
REGISTER_OPTYPE_DEFINE(DATA, "Data"); | |||
REGISTER_OPTYPE_DEFINE(AIPPDATA, "AippData"); | |||
@@ -1062,6 +1062,19 @@ Status GeExecutor::ReleaseSingleOpResource(void *stream) { | |||
return SingleOpManager::GetInstance().ReleaseResource(stream); | |||
} | |||
Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { | |||
auto model_manager = ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
auto davinci_model = model_manager->GetModel(model_id); | |||
if (davinci_model == nullptr) { | |||
GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); | |||
return FAILED; | |||
} | |||
device_id = davinci_model->GetDeviceId(); | |||
return SUCCESS; | |||
} | |||
Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||
std::vector<std::vector<int64_t>> batch_info; | |||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
@@ -86,6 +86,7 @@ class DataDumper { | |||
void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | |||
const DumpProperties &GetDumpProperties() const { return dump_properties_; } | |||
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | |||
const std::vector<OpDescInfo> &GetAllOpDescInfo() const { return op_desc_info_; } | |||
// Dump exception info | |||
Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file); | |||
@@ -258,7 +258,6 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { | |||
/// | |||
void DavinciModel::Shrink() { | |||
ge_model_.reset(); // delete object. | |||
op_list_.clear(); | |||
} | |||
Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
@@ -653,18 +652,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
GE_IF_BOOL_EXEC(IsBroadCastOpData(node), | |||
(void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); | |||
} | |||
// for profiling | |||
op_name_map_ = compute_graph->GetGraphOpName(); | |||
vector<string> op_name; | |||
GE_IF_BOOL_EXEC(ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name), | |||
GELOGI("get str of task_index_op_name")); | |||
if (op_name_map_.empty()) { | |||
for (size_t idx = 0; idx < op_name.size(); idx++) { | |||
op_name_map_[idx] = op_name[idx]; | |||
} | |||
GELOGI("Infer profiling: op_name_size(%zu)", op_name.size()); | |||
} | |||
GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); | |||
@@ -700,15 +687,13 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
} | |||
// collect profiling for ge | |||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||
Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); | |||
if (ret1 != SUCCESS) { | |||
GELOGE(ret1, "GetComputeGraphInfo failed."); | |||
return ret1; | |||
auto &profiling_manager = ProfilingManager::Instance(); | |||
if (profiling_manager.ProfilingModelLoadOn()) { | |||
Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode()); | |||
if (p_ret != SUCCESS) { | |||
GELOGE(p_ret, "Report profiling data failed."); | |||
return p_ret; | |||
} | |||
ProfilingManager::Instance().ReportProfilingData(GetTaskDescInfo(), compute_graph_desc_info); | |||
GE_CHK_STATUS(SinkModelProfile(), "Sink model profile failed."); | |||
} | |||
Shrink(); | |||
@@ -716,6 +701,20 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
return ret; | |||
} | |||
Status DavinciModel::ReportProfilingData(bool check_device) { | |||
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||
Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetComputeGraphInfo failed."); | |||
return ret; | |||
} | |||
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device); | |||
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | |||
op_list_.clear(); | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @brief Travel all nodes and determine if destruction is required. | |||
@@ -2909,34 +2908,25 @@ Status DavinciModel::DistributeTask() { | |||
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | |||
} | |||
} | |||
// get op_name by task_index | |||
if (task->GetCtx() != nullptr) { | |||
auto iter = op_name_map_.find(task_index); | |||
if (iter == op_name_map_.end()) { | |||
continue; | |||
} | |||
// else task index is found in op_name_map_ | |||
TaskDescInfo task_desc_info; | |||
string op_name = op_name_map_[task_index]; | |||
if (!om_name_.empty()) { | |||
task_desc_info.model_name = om_name_; | |||
} else { | |||
task_desc_info.model_name = name_; | |||
} | |||
task_desc_info.op_name = op_name; | |||
task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); | |||
task_desc_info.task_id = task->GetTaskID(); | |||
task_desc_info.stream_id = task->GetStreamId(); | |||
task_desc_info_.emplace_back(task_desc_info); | |||
if (flag) { | |||
if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||
TaskDescInfo task_desc_info; | |||
string op_name = "super_kernel_" + to_string(task_index); | |||
task_desc_info.op_name = op_name; | |||
task_desc_info.task_id = task->GetSktTaskID(); | |||
task_desc_info_.emplace_back(task_desc_info); | |||
} | |||
// Load task info for profiling | |||
TaskDescInfo task_desc_info; | |||
if (!om_name_.empty()) { | |||
task_desc_info.model_name = om_name_; | |||
} else { | |||
task_desc_info.model_name = name_; | |||
} | |||
task_desc_info.op_name = op->GetName(); | |||
task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); | |||
task_desc_info.task_id = task->GetTaskID(); | |||
task_desc_info.stream_id = task->GetStreamId(); | |||
task_desc_info_.emplace_back(task_desc_info); | |||
if (flag) { | |||
if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||
TaskDescInfo task_desc_info; | |||
string op_name = "super_kernel_" + to_string(task_index); | |||
task_desc_info.op_name = op_name; | |||
task_desc_info.task_id = task->GetSktTaskID(); | |||
task_desc_info_.emplace_back(task_desc_info); | |||
} | |||
} | |||
} | |||
@@ -3826,50 +3816,31 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||
main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | |||
} | |||
Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info) { | |||
Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||
GELOGI("GetComputeGraphInfo start."); | |||
for (auto &node : graph->GetAllNodes()) { | |||
auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||
for (auto &op_desc : all_op_desc) { | |||
ComputeGraphDescInfo compute_graph_info; | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
GELOGE(PARAM_INVALID, "op_desc is nullptr."); | |||
return PARAM_INVALID; | |||
if (!om_name_.empty()) { | |||
compute_graph_info.model_name = om_name_; | |||
} else { | |||
compute_graph_info.model_name = name_; | |||
} | |||
compute_graph_info.op_name = op_desc.op_name; | |||
compute_graph_info.op_type = op_desc.op_type; | |||
compute_graph_info.input_format = op_desc.input_format; | |||
compute_graph_info.input_shape = op_desc.input_shape; | |||
compute_graph_info.input_data_type = op_desc.input_data_type; | |||
compute_graph_info.output_format = op_desc.output_format; | |||
compute_graph_info.output_shape = op_desc.output_shape; | |||
compute_graph_info.output_data_type = op_desc.output_data_type; | |||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
if (!om_name_.empty()) { | |||
compute_graph_info.model_name = om_name_; | |||
} else { | |||
compute_graph_info.model_name = name_; | |||
} | |||
compute_graph_info.op_name = op_desc->GetName(); | |||
compute_graph_info.op_type = op_desc->GetType(); | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
if (input_desc == nullptr) { | |||
continue; | |||
} | |||
compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
} | |||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
} | |||
graph_desc_info.emplace_back(compute_graph_info); | |||
} | |||
graph_desc_info.emplace_back(compute_graph_info); | |||
} | |||
GELOGI("GetComputeGraphInfo end."); | |||
return SUCCESS; | |||
} | |||
void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | |||
if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | |||
tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | |||
@@ -439,6 +439,8 @@ class DavinciModel { | |||
Status SinkTimeProfile(const InputData ¤t_data); | |||
Status ReportProfilingData(bool check_device = true); | |||
void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | |||
data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | |||
} | |||
@@ -830,7 +832,7 @@ class DavinciModel { | |||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
// get desc info of graph for profiling | |||
Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info); | |||
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||
void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | |||
@@ -949,7 +951,6 @@ class DavinciModel { | |||
std::map<std::string, uint32_t> used_tbe_handle_map_; | |||
// for profiling task and graph info | |||
std::map<uint32_t, std::string> op_name_map_; | |||
std::vector<TaskDescInfo> task_desc_info_; | |||
int64_t maxDumpOpNum_; | |||
@@ -43,6 +43,8 @@ const std::string kCmdTypeProfInit = "prof_init"; | |||
const std::string kCmdTypeProfFinalize = "prof_finalize"; | |||
const std::string kCmdTypeProfStart = "prof_start"; | |||
const std::string kCmdTypeProfStop = "prof_stop"; | |||
const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; | |||
const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; | |||
const char *const kDeleteCustOp = "deleteCustOp"; | |||
struct CustAicpuSoBuf { | |||
@@ -334,11 +336,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
GELOGI("Parse model %u success.", model_id); | |||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
} | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
} while (0); | |||
GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | |||
@@ -565,7 +565,9 @@ Status ModelManager::HandleCommand(const Command &command) { | |||
{kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, | |||
{kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, | |||
{kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, | |||
{kCmdTypeProfStop, HandleProfStopCommand}}; | |||
{kCmdTypeProfStop, HandleProfStopCommand}, | |||
{kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, | |||
{kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}}; | |||
auto iter = cmds.find(command.cmd_type); | |||
if (iter == cmds.end()) { | |||
@@ -591,6 +593,77 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { | |||
return SUCCESS; | |||
} | |||
Status ModelManager::GetModelByCmd(const Command &command, | |||
std::shared_ptr<DavinciModel> &davinci_model) { | |||
if (command.cmd_params.size() < kCmdParSize) { | |||
GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.", | |||
command.cmd_type.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
std::string map_key = command.cmd_params[0]; | |||
std::string value = command.cmd_params[1]; | |||
if (map_key == PROFILE_MODEL_ID) { | |||
int32_t model_id = 0; | |||
try { | |||
model_id = std::stoi(value); | |||
} catch (std::invalid_argument &) { | |||
GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str()); | |||
return PARAM_INVALID; | |||
} catch (std::out_of_range &) { | |||
GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str()); | |||
return PARAM_INVALID; | |||
} catch (...) { | |||
GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str()); | |||
return FAILED; | |||
} | |||
auto model_manager = ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
davinci_model = model_manager->GetModel(static_cast<uint32_t>(model_id)); | |||
if (davinci_model == nullptr) { | |||
GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); | |||
return FAILED; | |||
} | |||
} else { | |||
GELOGE(FAILED, "The model_id parameter is not found in the command."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) { | |||
std::shared_ptr<DavinciModel> davinci_model = nullptr; | |||
Status ret = GetModelByCmd(command, davinci_model); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, | |||
static_cast<void *>(davinci_model.get())) != SUCCESS) { | |||
GELOGE(FAILED, "Handle prof model subscribe failed."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { | |||
std::shared_ptr<DavinciModel> davinci_model = nullptr; | |||
Status ret = GetModelByCmd(command, davinci_model); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast<void *>(davinci_model.get())) != SUCCESS) { | |||
GELOGE(FAILED, "Handle prof model unsubscribe failed."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::HandleProfInitCommand(const Command &command) { | |||
uint64_t module_index = command.module_index; | |||
if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { | |||
@@ -973,11 +1046,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
GELOGI("Parse model %u success.", model_id); | |||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
} | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); | |||
return SUCCESS; | |||
@@ -158,10 +158,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
static ge::Status HandleAclProfilingCommand(const Command &command); | |||
static ge::Status HandleProfileCommand(const Command &command); | |||
static ge::Status HandleDumpCommand(const Command &command); | |||
static ge::Status HandleProfModelSubscribeCommand(const Command &command); | |||
static ge::Status HandleProfModelUnsubscribeCommand(const Command &command); | |||
static ge::Status HandleProfInitCommand(const Command &command); | |||
static ge::Status HandleProfFinalizeCommand(const Command &command); | |||
static ge::Status HandleProfStartCommand(const Command &command); | |||
static ge::Status HandleProfStopCommand(const Command &command); | |||
static ge::Status GetModelByCmd(const Command &command, | |||
std::shared_ptr<DavinciModel> &davinci_model); | |||
/// | |||
/// @ingroup domi_ome | |||
/// @brief get model memory usage | |||
@@ -259,7 +259,9 @@ Status NodeDoneCallback::ProfilingReport() { | |||
return profiling_ret; | |||
} | |||
ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info); | |||
auto &profiling_manager = ProfilingManager::Instance(); | |||
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info, | |||
!profiling_manager.IsAclApiMode()); | |||
return SUCCESS; | |||
} | |||
@@ -70,6 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map<std::string, std::string> PROFILE_COMPONENT_MAP; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; | |||
@@ -567,10 +568,10 @@ enum ModelCheckType { | |||
/// @brief dynamic input type | |||
/// | |||
enum DynamicInputType { | |||
FIXED = 0, // default mode | |||
DYNAMIC_BATCH = 1, | |||
DYNAMIC_IMAGE = 2, | |||
DYNAMIC_DIMS = 3 | |||
FIXED = 0, // default mode | |||
DYNAMIC_BATCH = 1, | |||
DYNAMIC_IMAGE = 2, | |||
DYNAMIC_DIMS = 3 | |||
}; | |||
/// | |||
@@ -38,14 +38,14 @@ class DynamicSingleOp; | |||
struct RunModelData { | |||
uint32_t index; // Data index | |||
uint32_t modelId; | |||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||
uint32_t timestamp; // Data creation time | |||
uint32_t timeout; // Processing timeout | |||
uint64_t request_id = 0; // Request ID | |||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty | |||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||
uint32_t timestamp; // Data creation time | |||
uint32_t timeout; // Processing timeout | |||
uint64_t request_id = 0; // Request ID | |||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty | |||
}; | |||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
@@ -264,14 +264,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
DynamicSingleOp **single_op); | |||
static ge::Status ExecuteAsync(DynamicSingleOp *executor, | |||
const std::vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, | |||
static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &outputs); | |||
static ge::Status ReleaseSingleOpResource(void *stream); | |||
static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); | |||
ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); | |||
ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | |||
ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | |||