@@ -20,6 +20,8 @@ | |||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/utils/type_utils.h" | |||||
#include "graph/types.h" | |||||
#include "runtime/base.h" | #include "runtime/base.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
@@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
const size_t kReportMaxLen = 2048; | const size_t kReportMaxLen = 2048; | ||||
const int32_t kMaxDeviceNum = 256; | const int32_t kMaxDeviceNum = 256; | ||||
const uint32_t kInteval = 2; | |||||
const std::string kConfigNumsdev = "devNums"; | const std::string kConfigNumsdev = "devNums"; | ||||
const std::string kConfigDevIdList = "devIdList"; | const std::string kConfigDevIdList = "devIdList"; | ||||
const std::string kProfStart = "prof_start"; | const std::string kProfStart = "prof_start"; | ||||
const std::string kProfStop = "prof_stop"; | const std::string kProfStop = "prof_stop"; | ||||
const std::string kProfModelSubscribe = "prof_model_subscribe"; | const std::string kProfModelSubscribe = "prof_model_subscribe"; | ||||
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | ||||
const std::string kModelName = "model_name"; | |||||
const std::string kModelId = "model_id"; | |||||
const std::string kOpNmae = "op_name"; | |||||
const std::string kOptype = "op_type"; | |||||
const std::string kBlockDim = "block_dims"; | |||||
const std::string kTaskId = "task_id"; | |||||
const std::string kStreamId = "stream_id"; | |||||
const std::string kShapeType = "shape_type"; | |||||
const std::string kCurIterNum = "cur_iter_num"; | |||||
const std::string kTaskType = "task_type"; | |||||
const std::string kInput = "input"; | |||||
const std::string kOutput = "output"; | |||||
const std::string kFormat = "format"; | |||||
const std::string kDataType = "data_type"; | |||||
const std::string kShape = "shape"; | |||||
const std::string kIdx = "idx"; | |||||
#endif | #endif | ||||
} // namespace | } // namespace | ||||
@@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||||
#endif | #endif | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( | |||||
const TaskDescInfo &task, Json &task_json) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
std::string data; | |||||
for (const auto &task : task_desc_info) { | |||||
std::string model_name = task.model_name; | |||||
std::string op_name = task.op_name; | |||||
uint32_t block_dim = task.block_dim; | |||||
uint32_t task_id = task.task_id; | |||||
uint32_t stream_id = task.stream_id; | |||||
std::string shape_type = task.shape_type; | |||||
int64_t cur_iter_num = task.cur_iter_num; | |||||
uint32_t task_type = task.task_type; | |||||
data = model_name.append(" ") | |||||
.append(op_name).append(" ") | |||||
.append(std::to_string(block_dim)).append(" ") | |||||
.append(std::to_string(task_id)).append(" ") | |||||
.append(std::to_string(stream_id)).append(" ") | |||||
.append(std::to_string(model_id)).append(" ") | |||||
.append(shape_type).append(" ") | |||||
.append(std::to_string(cur_iter_num)).append(" ") | |||||
.append(std::to_string(task_type)).append("\n"); | |||||
ReporterData reporter_data{}; | |||||
reporter_data.deviceId = device_id; | |||||
reporter_data.data = (unsigned char *)data.c_str(); | |||||
reporter_data.dataLen = data.size(); | |||||
int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); | |||||
if (ret != EOK) { | |||||
GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); | |||||
return; | |||||
} | |||||
int32_t cb_ret = CallMsprofReport(reporter_data); | |||||
if (cb_ret != 0) { | |||||
GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); | |||||
return; | |||||
} | |||||
for (size_t i = 0; i < task.input_format.size(); i++) { | |||||
Json tmp_input; | |||||
tmp_input[kIdx] = i; | |||||
Format format = task.input_format[i]; | |||||
tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); | |||||
DataType data_type = task.input_data_type[i]; | |||||
tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||||
tmp_input[kShape] = task.input_shape[i]; | |||||
task_json[kInput] += tmp_input; | |||||
} | |||||
for (size_t i = 0; i < task.output_format.size(); i++) { | |||||
Json tmp_output; | |||||
tmp_output[kIdx] = i; | |||||
Format format = task.output_format[i]; | |||||
tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); | |||||
DataType data_type = task.output_data_type[i]; | |||||
tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||||
tmp_output[kShape] = task.output_shape[i]; | |||||
task_json[kOutput] += tmp_output; | |||||
} | } | ||||
data.clear(); | |||||
#endif | #endif | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||||
uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
std::string data; | |||||
for (const auto &graph : compute_graph_desc_info) { | |||||
data.append("model_name:") | |||||
.append(graph.model_name) | |||||
.append(" op_name:") | |||||
.append(graph.op_name) | |||||
.append(" op_type:") | |||||
.append(graph.op_type); | |||||
for (size_t i = 0; i < graph.input_format.size(); ++i) { | |||||
data.append(" input_id:") | |||||
.append(std::to_string(i)) | |||||
.append(" input_format:") | |||||
.append(std::to_string(graph.input_format.at(i))) | |||||
.append(" input_data_type:") | |||||
.append(std::to_string(graph.input_data_type.at(i))) | |||||
.append(" input_shape:\""); | |||||
size_t input_shape_len = graph.input_shape.at(i).size(); | |||||
if (input_shape_len == 0) { | |||||
data.append(""); | |||||
} else if (input_shape_len == 1) { | |||||
data.append(std::to_string(graph.input_shape.at(i).at(0))); | |||||
} else { | |||||
for (size_t j = 0; j < input_shape_len - 1; ++j) { | |||||
data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); | |||||
} | |||||
data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); | |||||
} | |||||
data.append("\""); | |||||
} | |||||
for (size_t i = 0; i < graph.output_format.size(); ++i) { | |||||
data.append(" output_id:") | |||||
.append(std::to_string(i)) | |||||
.append(" output_format:") | |||||
.append(std::to_string(graph.output_format.at(i))) | |||||
.append(" output_data_type:") | |||||
.append(std::to_string(graph.output_data_type.at(i))) | |||||
.append(" output_shape:\""); | |||||
size_t output_shape_len = graph.output_shape.at(i).size(); | |||||
if (output_shape_len == 0) { | |||||
data.append(""); | |||||
} else if (output_shape_len == 1) { | |||||
data.append(std::to_string(graph.output_shape.at(i).at(0))); | |||||
} else { | |||||
for (size_t j = 0; j < output_shape_len - 1; ++j) { | |||||
data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); | |||||
} | |||||
data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); | |||||
} | |||||
data.append("\""); | |||||
for (const auto &task : task_desc_info) { | |||||
Json task_info; | |||||
task_info[kModelName] = task.model_name; | |||||
task_info[kModelId] = model_id; | |||||
task_info[kOpNmae] = task.op_name; | |||||
task_info[kOptype] = task.op_type; | |||||
task_info[kBlockDim] = task.block_dim; | |||||
task_info[kTaskType] = task.task_type; | |||||
task_info[kTaskId] = task.task_id; | |||||
task_info[kStreamId] = task.stream_id; | |||||
task_info[kCurIterNum] = task.cur_iter_num; | |||||
task_info[kShapeType] = task.shape_type; | |||||
ProfilingOpInputOutInfo(task, task_info); | |||||
std::string reported_data; | |||||
try { | |||||
reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
} catch (std::exception &e) { | |||||
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
return ; | |||||
} catch (...) { | |||||
GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
return; | |||||
} | } | ||||
data.append(" model_id:").append(std::to_string(model_id)); | |||||
data.append(" task_id:").append(std::to_string(graph.task_id)); | |||||
data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||||
data.append("\n"); | |||||
GraphDescReport(device_id, data); | |||||
data.clear(); | |||||
reported_data.append(",") | |||||
.append("\n"); | |||||
ReportData(device_id, reported_data, "task_desc_info"); | |||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | |||||
const int32_t &device_id, const string &data, const string &tag_name) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
int ret = -1; | int ret = -1; | ||||
@@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d | |||||
size_t index = data.size() / kReportMaxLen; | size_t index = data.size() / kReportMaxLen; | ||||
if (index >= 1) { | if (index >= 1) { | ||||
reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||||
for (size_t i = 0; i < index; ++i) { | for (size_t i = 0; i < index; ++i) { | ||||
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | ||||
reporter_data.dataLen = kReportMaxLen; | reporter_data.dataLen = kReportMaxLen; | ||||
cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
return;); | |||||
} | } | ||||
reporter_data.dataLen = data.size() - kReportMaxLen * index; | reporter_data.dataLen = data.size() - kReportMaxLen * index; | ||||
if (reporter_data.dataLen != 0) { | if (reporter_data.dataLen != 0) { | ||||
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | ||||
cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
return;); | |||||
} | } | ||||
} else { | } else { | ||||
reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
reporter_data.data = (unsigned char *)data.c_str(); | reporter_data.data = (unsigned char *)data.c_str(); | ||||
reporter_data.dataLen = data.size(); | reporter_data.dataLen = data.size(); | ||||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||||
cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
return;); | |||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | ||||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
int32_t logic_device_id = 0; | int32_t logic_device_id = 0; | ||||
rtError_t rt_ret = rtGetDevice(&logic_device_id); | rtError_t rt_ret = rtGetDevice(&logic_device_id); | ||||
@@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
GELOGD("current logic_device_id:%d", logic_device_id); | GELOGD("current logic_device_id:%d", logic_device_id); | ||||
GELOGD("start ProfilingTaskDescInfo."); | GELOGD("start ProfilingTaskDescInfo."); | ||||
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ||||
GELOGD("start ProfilingGraphDescInfo."); | |||||
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||||
GELOGD("Report profiling data for GE end."); | GELOGD("Report profiling data for GE end."); | ||||
#endif | #endif | ||||
} | } | ||||
@@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs | |||||
static_cast<void *>(&reporter_data), sizeof(ReporterData)); | static_cast<void *>(&reporter_data), sizeof(ReporterData)); | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( | |||||
const OpDescPtr &op, TaskDescInfo &task_desc_info) const { | |||||
std::vector<Format> input_format; | |||||
std::vector<std::vector<int64_t>> input_shape; | |||||
std::vector<DataType> input_data_type; | |||||
for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||||
GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||||
if (input_tensor_desc == nullptr) { | |||||
continue; | |||||
} | |||||
input_format.emplace_back(input_tensor_desc->GetFormat()); | |||||
input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||||
input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||||
} | |||||
std::vector<Format> output_format; | |||||
std::vector<std::vector<int64_t>> output_shape; | |||||
std::vector<DataType> output_data_type; | |||||
for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||||
GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||||
if (output_tensor_desc == nullptr) { | |||||
continue; | |||||
} | |||||
output_format.emplace_back(output_tensor_desc->GetFormat()); | |||||
output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||||
output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||||
} | |||||
std::vector<Format> format_default = { FORMAT_NULL }; | |||||
std::vector<std::vector<int64_t>> shape_default = { {0} }; | |||||
std::vector<DataType> data_type_default = { DT_UNDEFINED }; | |||||
task_desc_info.input_format = input_format.empty() ? format_default : input_format; | |||||
task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; | |||||
task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; | |||||
task_desc_info.output_format = output_format.empty() ? format_default : output_format; | |||||
task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; | |||||
task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | ||||
std::string &fp_point, std::string &bp_point) { | std::string &fp_point, std::string &bp_point) { | ||||
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | ||||
@@ -54,6 +54,8 @@ namespace { | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
class OpDesc; | |||||
using OpDescPtr = std::shared_ptr<OpDesc>; | |||||
struct DeviceSubsInfo { | struct DeviceSubsInfo { | ||||
uint64_t module; | uint64_t module; | ||||
uint32_t subscribe_count; | uint32_t subscribe_count; | ||||
@@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
bool ProfilingModelExecuteOn() const; | bool ProfilingModelExecuteOn() const; | ||||
// is_execute_profiling_ only used by ge option and env | // is_execute_profiling_ only used by ge option and env | ||||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | ||||
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||||
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info); | |||||
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | ||||
const int32_t &device_id); | const int32_t &device_id); | ||||
void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||||
const int32_t &device_id); | |||||
void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); | |||||
Status PluginInit() const; | Status PluginInit() const; | ||||
void PluginUnInit() const; | void PluginUnInit() const; | ||||
Status CallMsprofReport(ReporterData &reporter_data) const; | Status CallMsprofReport(ReporterData &reporter_data) const; | ||||
@@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | ||||
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | ||||
void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | ||||
void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||||
void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||||
private: | private: | ||||
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | ||||
Status ParseOptions(const std::string &options); | Status ParseOptions(const std::string &options); | ||||
@@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | ||||
vector<int32_t> &device_list); | vector<int32_t> &device_list); | ||||
uint64_t GetProfilingModule(); | uint64_t GetProfilingModule(); | ||||
void GraphDescReport(const int32_t &device_id, const string &data); | |||||
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | ||||
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | ||||
@@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
// subgraph of dynamic graph no need to find index, has been found in parent graph | // subgraph of dynamic graph no need to find index, has been found in parent graph | ||||
if (IsSubGraphOfDynamicGraph(graph)) { | if (IsSubGraphOfDynamicGraph(graph)) { | ||||
GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||||
GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a; | |||||
const uint32_t kEndOfSequenceNew = 507005; | const uint32_t kEndOfSequenceNew = 507005; | ||||
const int32_t kModelAbortNormal = 0x0704000e; | const int32_t kModelAbortNormal = 0x0704000e; | ||||
const int32_t kModelAbortNormalNew = 507024; | const int32_t kModelAbortNormalNew = 507024; | ||||
const uint32_t kInteval = 2; | |||||
const char *const kModelName = "model_name"; | |||||
const char *const kModeleId = "model_id"; | |||||
const char *const kLoadStartTime = "load_start_time"; | |||||
const char *const kLoadEndTime = "load_end_time"; | |||||
const char *const kFusionOpInfo = "fusion_op_info"; | |||||
const char *const kFusionOpName = "fusion_op_name"; | |||||
const char *const kOriginalOpNum = "origin_op_num"; | |||||
const char *const kOriginalOpName = "origin_op_name"; | |||||
const char *const kStreamId = "stream_id"; | |||||
const char *const kFusionOpMemoryInfo = "memory_info"; | |||||
const char *const kInputSize = "input_size"; | |||||
const char *const kOutputSize = "output_size"; | |||||
const char *const kWeightSize = "weight_size"; | |||||
const char *const kWorkSpaceSize = "workspace_size"; | |||||
const char *const kTotalSize = "total_size"; | |||||
const char *const kTaskCount = "task_count"; | |||||
const char *const kTaskId = "task_id"; | |||||
const char* const kRequestId = "request_id"; | |||||
const char* const kThreadId = "thread_id"; | |||||
const char* const kInputBeginTime = "input_begin_time"; | |||||
const char* const kInputEndTime = "input_end_time"; | |||||
const char* const kInferBeginTime = "infer_begin_time"; | |||||
const char* const kInferEndTime = "infer_end_time"; | |||||
const char* const kOutputBeginTime = "output_start_time"; | |||||
const char* const kOutputEndTime = "output_end_time"; | |||||
inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | ||||
@@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
} | } | ||||
Status DavinciModel::ReportProfilingData() { | Status DavinciModel::ReportProfilingData() { | ||||
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||||
Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "GetComputeGraphInfo failed."); | |||||
return ret; | |||||
} | |||||
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | |||||
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); | |||||
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { | |||||
} | } | ||||
Status DavinciModel::SinkModelProfile() { | Status DavinciModel::SinkModelProfile() { | ||||
// profiling plugin must be registered | |||||
auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
ReporterData reporter_data{}; | |||||
// report model data tag name | |||||
std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||||
return FAILED, "Sink model tag memcpy error."); | |||||
// Model Header | // Model Header | ||||
std::string name = om_name_.empty() ? name_ : om_name_; | std::string name = om_name_.empty() ? name_ : om_name_; | ||||
size_t name_len = name.size(); | |||||
reporter_data.deviceId = device_id_; | |||||
reporter_data.data = (unsigned char *)&name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
reporter_data.data = (unsigned char *)name.c_str(); | |||||
reporter_data.dataLen = name.size(); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
uint32_t model_id = this->Id(); | uint32_t model_id = this->Id(); | ||||
reporter_data.data = (unsigned char *)&model_id; | |||||
reporter_data.dataLen = sizeof(uint32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// Load Start/End Time | |||||
int64_t start_time = this->GetLoadBeginTime(); | int64_t start_time = this->GetLoadBeginTime(); | ||||
reporter_data.data = (unsigned char *)&start_time; | |||||
reporter_data.dataLen = sizeof(int64_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
int64_t end_time = this->GetLoadEndTime(); | int64_t end_time = this->GetLoadEndTime(); | ||||
reporter_data.data = (unsigned char *)&end_time; | |||||
reporter_data.dataLen = sizeof(int64_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
Json model_load_info; | |||||
model_load_info[kModelName] = name; | |||||
model_load_info[kModeleId] = model_id; | |||||
model_load_info[kLoadStartTime] = start_time; | |||||
model_load_info[kLoadEndTime] = end_time; | |||||
// fusion op info | |||||
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | ||||
using Range = std::pair<CIT, CIT>; | using Range = std::pair<CIT, CIT>; | ||||
for (const ProfileInfo &profile : profile_list_) { | for (const ProfileInfo &profile : profile_list_) { | ||||
// op name after fusion | |||||
Json fusion_op_info; | |||||
string fusion_op_name = profile.fusion_info.op_name; | string fusion_op_name = profile.fusion_info.op_name; | ||||
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||||
reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||||
reporter_data.dataLen = fusion_op_name_len; | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// original op name before fusion | |||||
uint32_t op_num = profile.fusion_info.original_op_names.size(); | uint32_t op_num = profile.fusion_info.original_op_names.size(); | ||||
reporter_data.data = (unsigned char *)&op_num; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
vector<string> original_name; | |||||
for (uint32_t k = 0; k < op_num; k++) { | for (uint32_t k = 0; k < op_num; k++) { | ||||
std::string op_name = profile.fusion_info.original_op_names[k]; | |||||
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||||
reporter_data.data = (unsigned char *)&op_name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
reporter_data.data = (unsigned char *)op_name.c_str(); | |||||
reporter_data.dataLen = op_name_len; | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
} | |||||
// stream id info | |||||
original_name.emplace_back(profile.fusion_info.original_op_names[k]); | |||||
} | |||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
auto iter = profiler_report_op_info_.find(fusion_op_name); | auto iter = profiler_report_op_info_.find(fusion_op_name); | ||||
if (iter != profiler_report_op_info_.end()) { | if (iter != profiler_report_op_info_.end()) { | ||||
stream_id = iter->second.second; | stream_id = iter->second.second; | ||||
} | } | ||||
reporter_data.data = (unsigned char *)&stream_id; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// memory info | |||||
reporter_data.data = (unsigned char *)&profile.memory_info; | |||||
reporter_data.dataLen = sizeof(profile.memory_info); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// task info | |||||
reporter_data.data = (unsigned char *)&profile.task_count; | |||||
reporter_data.dataLen = sizeof(uint32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
fusion_op_info[kFusionOpName] = fusion_op_name; | |||||
fusion_op_info[kOriginalOpNum] = op_num; | |||||
fusion_op_info[kOriginalOpName] = original_name; | |||||
fusion_op_info[kStreamId] = stream_id; | |||||
fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; | |||||
fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; | |||||
fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; | |||||
fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; | |||||
fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; | |||||
fusion_op_info[kTaskCount] = profile.task_count; | |||||
vector<uint32_t> task_id; | |||||
Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | ||||
for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | ||||
uint32_t task_id = idx->second; | |||||
reporter_data.data = (unsigned char *)&task_id; | |||||
reporter_data.dataLen = sizeof(uint32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
task_id.push_back(idx->second); | |||||
} | } | ||||
fusion_op_info[kTaskId] = task_id; | |||||
model_load_info[kFusionOpInfo] += fusion_op_info; | |||||
} | } | ||||
std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
std::string reported_data; | |||||
try { | |||||
reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
} catch (std::exception &e) { | |||||
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
} catch (...) { | |||||
GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
} | |||||
reported_data.append(",") | |||||
.append("\n"); | |||||
prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | ||||
// profiling plugin must be registered | |||||
auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
ReporterData reporter_data{}; | |||||
string name = om_name_.empty() ? name_ : om_name_; | |||||
Json model_time_info; | |||||
model_time_info[kModelName] = name; | |||||
model_time_info[kModeleId] = this->Id(); | |||||
model_time_info[kRequestId] = current_data.request_id; | |||||
model_time_info[kThreadId] = GetDataInputTid(); | |||||
model_time_info[kInputBeginTime] = time_info_.processBeginTime; | |||||
model_time_info[kInputEndTime] = time_info_.processEndTime; | |||||
model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; | |||||
model_time_info[kInferEndTime] = time_info_.inferenceEndTime; | |||||
model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; | |||||
model_time_info[kOutputEndTime] = time_info_.dumpEndTime; | |||||
// report model data tag name | // report model data tag name | ||||
std::string tag_name; | std::string tag_name; | ||||
tag_name.append("model_time_info_") | tag_name.append("model_time_info_") | ||||
.append(std::to_string(this->Id())) | |||||
.append("_") | |||||
.append(std::to_string(current_data.index)); | |||||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||||
return FAILED, "Sink model tag memcpy error."); | |||||
// device id | |||||
reporter_data.deviceId = device_id_; | |||||
// Model Header | |||||
string name; | |||||
if (!om_name_.empty()) { | |||||
name = om_name_; | |||||
} else { | |||||
name = name_; | |||||
} | |||||
size_t name_len = name.size(); | |||||
reporter_data.data = (unsigned char *)&name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
reporter_data.data = (unsigned char *)name.c_str(); | |||||
reporter_data.dataLen = name.size(); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// request id | |||||
uint64_t request_id = current_data.request_id; | |||||
reporter_data.data = (unsigned char *)&request_id; | |||||
reporter_data.dataLen = sizeof(uint32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
// thread id | |||||
int32_t thread_id = GetDataInputTid(); | |||||
reporter_data.data = (unsigned char *)&thread_id; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
// time info | |||||
time_info_.modelId = this->Id(); | |||||
reporter_data.data = (unsigned char *)&time_info_; | |||||
reporter_data.dataLen = sizeof(struct timeInfo); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
.append(std::to_string(this->Id())) | |||||
.append("_") | |||||
.append(std::to_string(current_data.index)); | |||||
std::string reported_data; | |||||
try { | |||||
reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
} catch (std::exception &e) { | |||||
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
} catch (...) { | |||||
GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
} | |||||
reported_data.append(",") | |||||
.append("\n"); | |||||
prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||||
task_desc_info.model_name = name_; | task_desc_info.model_name = name_; | ||||
} | } | ||||
task_desc_info.op_name = op->GetName(); | task_desc_info.op_name = op->GetName(); | ||||
task_desc_info.op_type = op->GetType(); | |||||
task_desc_info.block_dim = task_def.kernel().block_dim(); | task_desc_info.block_dim = task_def.kernel().block_dim(); | ||||
task_desc_info.task_id = task->GetTaskID(); | task_desc_info.task_id = task->GetTaskID(); | ||||
task_desc_info.stream_id = task->GetStreamId(); | task_desc_info.stream_id = task->GetStreamId(); | ||||
task_desc_info.shape_type = "static"; | task_desc_info.shape_type = "static"; | ||||
task_desc_info.cur_iter_num = 0; | task_desc_info.cur_iter_num = 0; | ||||
// task type | |||||
task_desc_info.task_type = kTaskTypeInvalid; | task_desc_info.task_type = kTaskTypeInvalid; | ||||
auto &prof_mgr = ProfilingManager::Instance(); | |||||
prof_mgr.GetOpInputOutputInfo(op, task_desc_info); | |||||
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
if (model_task_type == RT_MODEL_TASK_KERNEL) { | if (model_task_type == RT_MODEL_TASK_KERNEL) { | ||||
const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
@@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||||
task_desc_info_.emplace_back(task_desc_info); | task_desc_info_.emplace_back(task_desc_info); | ||||
} | } | ||||
} | } | ||||
return; | |||||
} | } | ||||
Status DavinciModel::DistributeTask() { | Status DavinciModel::DistributeTask() { | ||||
@@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||||
main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | ||||
} | } | ||||
Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||||
auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||||
for (auto &op_desc : all_op_desc) { | |||||
ComputeGraphDescInfo compute_graph_info; | |||||
if (!om_name_.empty()) { | |||||
compute_graph_info.model_name = om_name_; | |||||
} else { | |||||
compute_graph_info.model_name = name_; | |||||
} | |||||
std::vector<Format> format = { FORMAT_NULL }; | |||||
std::vector<std::vector<int64_t>> shape = { {0} }; | |||||
std::vector<DataType> data_type = { DT_UNDEFINED }; | |||||
compute_graph_info.op_name = op_desc.op_name; | |||||
compute_graph_info.op_type = op_desc.op_type; | |||||
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||||
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||||
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||||
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||||
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||||
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||||
if (iter != profiler_report_op_info_.end()) { | |||||
task_id = iter->second.first; | |||||
stream_id = iter->second.second; | |||||
} | |||||
compute_graph_info.task_id = task_id; | |||||
compute_graph_info.stream_id = stream_id; | |||||
graph_desc_info.emplace_back(compute_graph_info); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | ||||
if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | ||||
tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | ||||
@@ -840,9 +840,6 @@ class DavinciModel { | |||||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | ||||
// get desc info of graph for profiling | |||||
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||||
void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | ||||
Status InitL1DataDumperArgs(); | Status InitL1DataDumperArgs(); | ||||
@@ -70,8 +70,6 @@ class NodeDoneCallback { | |||||
Status PrepareConstInputs(const NodeItem &node_item); | Status PrepareConstInputs(const NodeItem &node_item); | ||||
Status DumpDynamicNode(); | Status DumpDynamicNode(); | ||||
Status ProfilingReport(); | Status ProfilingReport(); | ||||
Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||||
std::vector<ComputeGraphDescInfo> &compute_graph_info); | |||||
Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | ||||
std::vector<TaskDescInfo> &task_desc_info); | std::vector<TaskDescInfo> &task_desc_info); | ||||
GraphExecutionContext *graph_context_; | GraphExecutionContext *graph_context_; | ||||
@@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
} | } | ||||
GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | ||||
auto &prof_mgr = ProfilingManager::Instance(); | |||||
task_desc_info = context_->GetProfilingTaskDescInfo(); | task_desc_info = context_->GetProfilingTaskDescInfo(); | ||||
context_->ClearProfilingTaskDescInfo(); | context_->ClearProfilingTaskDescInfo(); | ||||
return SUCCESS; | |||||
} | |||||
Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||||
std::vector<ComputeGraphDescInfo> &compute_graph_info) { | |||||
GE_CHECK_NOTNULL(node); | |||||
GE_CHECK_NOTNULL(model); | |||||
GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||||
compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||||
context_->ClearProfilingGraphDescInfo(); | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
for (auto &tmp_compute_graph_info : compute_graph_info) { | |||||
// default | |||||
if (op_desc->GetAllInputsSize() == 0) { | |||||
tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.input_shape = { {0} }; | |||||
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
if (input_desc == nullptr) { | |||||
continue; | |||||
} | |||||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
} | |||||
if (op_desc->GetOutputsSize() == 0) { | |||||
tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.output_shape = { {0} }; | |||||
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
} | |||||
for (auto &tmp_task_desc : task_desc_info) { | |||||
// save op input and output info | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { | |||||
return profiling_ret; | return profiling_ret; | ||||
} | } | ||||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); | |||||
if (profiling_ret != RT_ERROR_NONE) { | |||||
GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); | |||||
return profiling_ret; | |||||
} | |||||
auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); | |||||
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return FAILED; | |||||
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | ||||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
} | } | ||||
@@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return FAILED; | |||||
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | ||||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
@@ -515,7 +515,7 @@ Status TaskContext::Synchronize() { | |||||
} | } | ||||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | ||||
uint32_t task_type, uint32_t block_dim) { | |||||
const std::string &task_type, uint32_t block_dim) { | |||||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | ||||
const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
@@ -525,11 +525,11 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
std::string op_name = op_desc->GetName(); | |||||
std::string dynamic_model_name = model->GetModelName(); | std::string dynamic_model_name = model->GetModelName(); | ||||
TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
tmp_task_desc_info.op_name = op_name; | |||||
tmp_task_desc_info.op_name = op_desc->GetName(); | |||||
tmp_task_desc_info.op_type = op_desc->GetType(); | |||||
tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
tmp_task_desc_info.task_type = task_type; | tmp_task_desc_info.task_type = task_type; | ||||
tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
@@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { | |||||
return node_state_; | return node_state_; | ||||
} | } | ||||
Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
const NodeItem &node_item = GetNodeItem(); | |||||
auto op_desc = node_item.GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
GE_CHECK_NOTNULL(graph_context); | |||||
const HybridModel *model = graph_context->model; | |||||
GE_CHECK_NOTNULL(model); | |||||
std::string dynamic_model_name = model->GetModelName(); | |||||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
ComputeGraphDescInfo tmp_compute_graph_info; | |||||
tmp_compute_graph_info.model_name = dynamic_model_name; | |||||
tmp_compute_graph_info.op_name = op_desc->GetName(); | |||||
tmp_compute_graph_info.op_type = op_desc->GetType(); | |||||
tmp_compute_graph_info.task_id = task_id; | |||||
tmp_compute_graph_info.stream_id = stream_id; | |||||
compute_graph_info.emplace_back(tmp_compute_graph_info); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -113,13 +113,10 @@ class TaskContext { | |||||
void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
const std::string &task_type, uint32_t block_dim); | |||||
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||||
Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||||
void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||||
private: | private: | ||||
TaskContext(GraphExecutionContext *execution_context, | TaskContext(GraphExecutionContext *execution_context, | ||||
NodeState *node_state, | NodeState *node_state, | ||||
@@ -141,7 +138,6 @@ class TaskContext { | |||||
uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||
@@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
string model_name; | |||||
string op_name; | |||||
TaskDescInfo tmp_task_desc_info; | |||||
uint32_t model_id; | uint32_t model_id; | ||||
uint32_t block_dim; | |||||
if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||||
if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | ||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
} | } | ||||
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||||
std::vector<TaskDescInfo> task_desc_info; | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
GELOGD("ProfilingReport of op[%s] model[%s] start.", | |||||
tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); | |||||
TaskDescInfo tmp_task_desc_info; | |||||
tmp_task_desc_info.model_name = model_name; | |||||
tmp_task_desc_info.op_name = op_name; | |||||
tmp_task_desc_info.block_dim = block_dim; | |||||
tmp_task_desc_info.task_id = task_id; | |||||
tmp_task_desc_info.stream_id = stream_id; | |||||
tmp_task_desc_info.shape_type = shape_type; | tmp_task_desc_info.shape_type = shape_type; | ||||
tmp_task_desc_info.cur_iter_num = 0; | tmp_task_desc_info.cur_iter_num = 0; | ||||
tmp_task_desc_info.task_type = op_task->GetTaskType(); | tmp_task_desc_info.task_type = op_task->GetTaskType(); | ||||
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
task_desc_info.emplace_back(tmp_task_desc_info); | |||||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
std::vector<TaskDescInfo> task_desc_info; | |||||
task_desc_info.emplace_back(tmp_task_desc_info); | |||||
auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); | |||||
profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namespace | } // namespace | ||||
@@ -23,6 +23,7 @@ | |||||
#include "aicpu/common/aicpu_task_struct.h" | #include "aicpu/common/aicpu_task_struct.h" | ||||
#include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
#include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
#include "common/profiling/profiling_manager.h" | |||||
#include "common/formats/formats.h" | #include "common/formats/formats.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
@@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||||
model_id_ = model_id; | model_id_ = model_id; | ||||
} | } | ||||
Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||||
uint32_t &block_dim) { | |||||
model_name = model_name_; | |||||
model_id = model_id_; | |||||
block_dim = block_dim_; | |||||
Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
op_name = op_desc_->GetName(); | |||||
string op_name = op_desc_->GetName(); | |||||
GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
model_id = model_id_; | |||||
task_desc_info.model_name = model_name_; | |||||
task_desc_info.block_dim = block_dim_; | |||||
task_desc_info.task_id = task_id; | |||||
task_desc_info.stream_id = stream_id; | |||||
task_desc_info.op_name = op_name; | |||||
task_desc_info.op_type = op_desc_->GetType(); | |||||
auto &prof_mgr = ProfilingManager::Instance(); | |||||
prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | ||||
return UNSUPPORTED; | return UNSUPPORTED; | ||||
} | } | ||||
@@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
return UNSUPPORTED; | return UNSUPPORTED; | ||||
} | } | ||||
uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
TbeOpTask::~TbeOpTask() { | TbeOpTask::~TbeOpTask() { | ||||
if (sm_desc_ != nullptr) { | if (sm_desc_ != nullptr) { | ||||
@@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||||
const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | ||||
uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
void TbeOpTask::SetHandle(void *handle) { | void TbeOpTask::SetHandle(void *handle) { | ||||
this->handle_ = handle; | this->handle_ = handle; | ||||
@@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
return DoUpdateArgTable(param, false); | return DoUpdateArgTable(param, false); | ||||
} | } | ||||
uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | ||||
arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | ||||
@@ -43,7 +43,7 @@ class OpTask { | |||||
const vector<GeTensorDesc> &output_desc); | const vector<GeTensorDesc> &output_desc); | ||||
virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||||
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||||
const OpDescPtr &GetOpdesc() const {return op_desc_;} | const OpDescPtr &GetOpdesc() const {return op_desc_;} | ||||
Status OpenDump(rtStream_t stream); | Status OpenDump(rtStream_t stream); | ||||
virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | ||||
@@ -52,7 +52,7 @@ class OpTask { | |||||
std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
std::vector<DataBuffer> &output_buffers, | std::vector<DataBuffer> &output_buffers, | ||||
rtStream_t stream); | rtStream_t stream); | ||||
virtual uint32_t GetTaskType() const; | |||||
virtual const std::string &GetTaskType() const; | |||||
protected: | protected: | ||||
Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | ||||
@@ -88,7 +88,7 @@ class TbeOpTask : public OpTask { | |||||
size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | ||||
uint32_t GetTaskType() const override; | |||||
const std::string &GetTaskType() const override; | |||||
void SetHandle(void *handle); | void SetHandle(void *handle); | ||||
private: | private: | ||||
@@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask { | |||||
~AiCpuBaseTask() override; | ~AiCpuBaseTask() override; | ||||
UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | ||||
Status UpdateArgTable(const SingleOpModelParam ¶m) override; | Status UpdateArgTable(const SingleOpModelParam ¶m) override; | ||||
uint32_t GetTaskType() const override; | |||||
const std::string &GetTaskType() const override; | |||||
protected: | protected: | ||||
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
@@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||||
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
// profiling data | // profiling data | ||||
const uint32_t kTaskTypeAicore = 0; | |||||
const uint32_t kTaskTypeAicpu = 1; | |||||
const uint32_t kTaskTypeInvalid = 0xFFFF; | |||||
const std::string kTaskTypeAicore = "AI_CORE"; | |||||
const std::string kTaskTypeAicpu = "AI_CPU"; | |||||
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||||
// Data cache, including data address and length | // Data cache, including data address and length | ||||
struct DataBuffer { | struct DataBuffer { | ||||
@@ -251,27 +251,19 @@ struct Options { | |||||
struct TaskDescInfo { | struct TaskDescInfo { | ||||
std::string model_name; | std::string model_name; | ||||
std::string op_name; | std::string op_name; | ||||
std::string op_type; | |||||
uint32_t block_dim; | uint32_t block_dim; | ||||
uint32_t task_id; | uint32_t task_id; | ||||
uint32_t stream_id; | uint32_t stream_id; | ||||
std::string shape_type; | std::string shape_type; | ||||
int64_t cur_iter_num; | int64_t cur_iter_num; | ||||
uint32_t task_type; | |||||
}; | |||||
// Profiling info of graph | |||||
struct ComputeGraphDescInfo { | |||||
std::string model_name; | |||||
std::string op_name; | |||||
std::string op_type; | |||||
std::string task_type; | |||||
std::vector<Format> input_format; | std::vector<Format> input_format; | ||||
std::vector<std::vector<int64_t>> input_shape; | std::vector<std::vector<int64_t>> input_shape; | ||||
std::vector<DataType> input_data_type; | std::vector<DataType> input_data_type; | ||||
std::vector<Format> output_format; | std::vector<Format> output_format; | ||||
std::vector<std::vector<int64_t>> output_shape; | std::vector<std::vector<int64_t>> output_shape; | ||||
std::vector<DataType> output_data_type; | std::vector<DataType> output_data_type; | ||||
uint32_t task_id; | |||||
uint32_t stream_id; | |||||
}; | }; | ||||
struct OpDescInfo { | struct OpDescInfo { | ||||
@@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES | |||||
) | ) | ||||
set(SINGLE_OP_TEST_FILES | set(SINGLE_OP_TEST_FILES | ||||
#"single_op/single_op_model_unittest.cc" | |||||
"single_op/single_op_model_unittest.cc" | |||||
"single_op/single_op_manager_unittest.cc" | "single_op/single_op_manager_unittest.cc" | ||||
"single_op/stream_resource_unittest.cc" | "single_op/stream_resource_unittest.cc" | ||||
"single_op/single_op_task_unittest.cc" | "single_op/single_op_task_unittest.cc" | ||||
@@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||||
model.SinkModelProfile(); | model.SinkModelProfile(); | ||||
} | } | ||||
TEST_F(UtestDavinciModel, Sink_time_profile) { | |||||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||||
DavinciModel model(0, nullptr); | |||||
InputData current_data; | |||||
model.SinkTimeProfile(current_data); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { | |||||
void TearDown() {} | void TearDown() {} | ||||
}; | }; | ||||
//rt api stub | |||||
rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { | |||||
return RT_ERROR_NONE; | |||||
} | |||||
/* | /* | ||||
TEST_F(UtestSingleOpModel, test_init_model) { | TEST_F(UtestSingleOpModel, test_init_model) { | ||||
string model_data_str = "123456789"; | string model_data_str = "123456789"; | ||||
@@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { | |||||
std::mutex stream_mu_; | std::mutex stream_mu_; | ||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
SingleOp single_op(&stream_mu_, stream_); | |||||
ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||||
// SingleOp single_op(&stream_mu_, stream_); | |||||
// | |||||
// ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||||
} | } | ||||
/* | /* | ||||
TEST_F(UtestSingleOpModel, test_build_kernel_task) { | TEST_F(UtestSingleOpModel, test_build_kernel_task) { | ||||
@@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { | |||||
ASSERT_EQ(op_model.Init(), FAILED); | ASSERT_EQ(op_model.Init(), FAILED); | ||||
} | } | ||||
*/ | */ | ||||
/* | |||||
TEST_F(UtestSingleOpModel, test_parse_arg_table) { | TEST_F(UtestSingleOpModel, test_parse_arg_table) { | ||||
string model_data_str = "123456789"; | string model_data_str = "123456789"; | ||||
SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | ||||
@@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||||
ASSERT_EQ(op.arg_table_[1].size(), 1); | ASSERT_EQ(op.arg_table_[1].size(), 1); | ||||
ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | ||||
} | } | ||||
*/ | |||||
TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { | |||||
string name = "relu"; | |||||
string type = "relu"; | |||||
auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||||
op_desc->SetStreamId(0); | |||||
op_desc->SetId(0); | |||||
TbeOpTask task; | |||||
task.op_desc_ = op_desc; | |||||
task.model_name_ = "resnet_50"; | |||||
task.model_id_ = 1; | |||||
TaskDescInfo task_desc_info; | |||||
uint32_t model_id; | |||||
task.GetProfilingArgs(task_desc_info, model_id); | |||||
ASSERT_EQ(task_desc_info.model_name, "resnet_50"); | |||||
ASSERT_EQ(model_id, 1); | |||||
} | |||||