| @@ -20,6 +20,8 @@ | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/utils/type_utils.h" | |||||
| #include "graph/types.h" | |||||
| #include "runtime/base.h" | #include "runtime/base.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| @@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| const size_t kReportMaxLen = 2048; | const size_t kReportMaxLen = 2048; | ||||
| const int32_t kMaxDeviceNum = 256; | const int32_t kMaxDeviceNum = 256; | ||||
| const uint32_t kInteval = 2; | |||||
| const std::string kConfigNumsdev = "devNums"; | const std::string kConfigNumsdev = "devNums"; | ||||
| const std::string kConfigDevIdList = "devIdList"; | const std::string kConfigDevIdList = "devIdList"; | ||||
| const std::string kProfStart = "prof_start"; | const std::string kProfStart = "prof_start"; | ||||
| const std::string kProfStop = "prof_stop"; | const std::string kProfStop = "prof_stop"; | ||||
| const std::string kProfModelSubscribe = "prof_model_subscribe"; | const std::string kProfModelSubscribe = "prof_model_subscribe"; | ||||
| const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | ||||
| const std::string kModelName = "model_name"; | |||||
| const std::string kModelId = "model_id"; | |||||
| const std::string kOpNmae = "op_name"; | |||||
| const std::string kOptype = "op_type"; | |||||
| const std::string kBlockDim = "block_dims"; | |||||
| const std::string kTaskId = "task_id"; | |||||
| const std::string kStreamId = "stream_id"; | |||||
| const std::string kShapeType = "shape_type"; | |||||
| const std::string kCurIterNum = "cur_iter_num"; | |||||
| const std::string kTaskType = "task_type"; | |||||
| const std::string kInput = "input"; | |||||
| const std::string kOutput = "output"; | |||||
| const std::string kFormat = "format"; | |||||
| const std::string kDataType = "data_type"; | |||||
| const std::string kShape = "shape"; | |||||
| const std::string kIdx = "idx"; | |||||
| #endif | #endif | ||||
| } // namespace | } // namespace | ||||
| @@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( | |||||
| const TaskDescInfo &task, Json &task_json) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| std::string data; | |||||
| for (const auto &task : task_desc_info) { | |||||
| std::string model_name = task.model_name; | |||||
| std::string op_name = task.op_name; | |||||
| uint32_t block_dim = task.block_dim; | |||||
| uint32_t task_id = task.task_id; | |||||
| uint32_t stream_id = task.stream_id; | |||||
| std::string shape_type = task.shape_type; | |||||
| int64_t cur_iter_num = task.cur_iter_num; | |||||
| uint32_t task_type = task.task_type; | |||||
| data = model_name.append(" ") | |||||
| .append(op_name).append(" ") | |||||
| .append(std::to_string(block_dim)).append(" ") | |||||
| .append(std::to_string(task_id)).append(" ") | |||||
| .append(std::to_string(stream_id)).append(" ") | |||||
| .append(std::to_string(model_id)).append(" ") | |||||
| .append(shape_type).append(" ") | |||||
| .append(std::to_string(cur_iter_num)).append(" ") | |||||
| .append(std::to_string(task_type)).append("\n"); | |||||
| ReporterData reporter_data{}; | |||||
| reporter_data.deviceId = device_id; | |||||
| reporter_data.data = (unsigned char *)data.c_str(); | |||||
| reporter_data.dataLen = data.size(); | |||||
| int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); | |||||
| if (ret != EOK) { | |||||
| GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); | |||||
| return; | |||||
| } | |||||
| int32_t cb_ret = CallMsprofReport(reporter_data); | |||||
| if (cb_ret != 0) { | |||||
| GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); | |||||
| return; | |||||
| } | |||||
| for (size_t i = 0; i < task.input_format.size(); i++) { | |||||
| Json tmp_input; | |||||
| tmp_input[kIdx] = i; | |||||
| Format format = task.input_format[i]; | |||||
| tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); | |||||
| DataType data_type = task.input_data_type[i]; | |||||
| tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||||
| tmp_input[kShape] = task.input_shape[i]; | |||||
| task_json[kInput] += tmp_input; | |||||
| } | |||||
| for (size_t i = 0; i < task.output_format.size(); i++) { | |||||
| Json tmp_output; | |||||
| tmp_output[kIdx] = i; | |||||
| Format format = task.output_format[i]; | |||||
| tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); | |||||
| DataType data_type = task.output_data_type[i]; | |||||
| tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||||
| tmp_output[kShape] = task.output_shape[i]; | |||||
| task_json[kOutput] += tmp_output; | |||||
| } | } | ||||
| data.clear(); | |||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||||
| uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| std::string data; | |||||
| for (const auto &graph : compute_graph_desc_info) { | |||||
| data.append("model_name:") | |||||
| .append(graph.model_name) | |||||
| .append(" op_name:") | |||||
| .append(graph.op_name) | |||||
| .append(" op_type:") | |||||
| .append(graph.op_type); | |||||
| for (size_t i = 0; i < graph.input_format.size(); ++i) { | |||||
| data.append(" input_id:") | |||||
| .append(std::to_string(i)) | |||||
| .append(" input_format:") | |||||
| .append(std::to_string(graph.input_format.at(i))) | |||||
| .append(" input_data_type:") | |||||
| .append(std::to_string(graph.input_data_type.at(i))) | |||||
| .append(" input_shape:\""); | |||||
| size_t input_shape_len = graph.input_shape.at(i).size(); | |||||
| if (input_shape_len == 0) { | |||||
| data.append(""); | |||||
| } else if (input_shape_len == 1) { | |||||
| data.append(std::to_string(graph.input_shape.at(i).at(0))); | |||||
| } else { | |||||
| for (size_t j = 0; j < input_shape_len - 1; ++j) { | |||||
| data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); | |||||
| } | |||||
| data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); | |||||
| } | |||||
| data.append("\""); | |||||
| } | |||||
| for (size_t i = 0; i < graph.output_format.size(); ++i) { | |||||
| data.append(" output_id:") | |||||
| .append(std::to_string(i)) | |||||
| .append(" output_format:") | |||||
| .append(std::to_string(graph.output_format.at(i))) | |||||
| .append(" output_data_type:") | |||||
| .append(std::to_string(graph.output_data_type.at(i))) | |||||
| .append(" output_shape:\""); | |||||
| size_t output_shape_len = graph.output_shape.at(i).size(); | |||||
| if (output_shape_len == 0) { | |||||
| data.append(""); | |||||
| } else if (output_shape_len == 1) { | |||||
| data.append(std::to_string(graph.output_shape.at(i).at(0))); | |||||
| } else { | |||||
| for (size_t j = 0; j < output_shape_len - 1; ++j) { | |||||
| data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); | |||||
| } | |||||
| data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); | |||||
| } | |||||
| data.append("\""); | |||||
| for (const auto &task : task_desc_info) { | |||||
| Json task_info; | |||||
| task_info[kModelName] = task.model_name; | |||||
| task_info[kModelId] = model_id; | |||||
| task_info[kOpNmae] = task.op_name; | |||||
| task_info[kOptype] = task.op_type; | |||||
| task_info[kBlockDim] = task.block_dim; | |||||
| task_info[kTaskType] = task.task_type; | |||||
| task_info[kTaskId] = task.task_id; | |||||
| task_info[kStreamId] = task.stream_id; | |||||
| task_info[kCurIterNum] = task.cur_iter_num; | |||||
| task_info[kShapeType] = task.shape_type; | |||||
| ProfilingOpInputOutInfo(task, task_info); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| return ; | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| return; | |||||
| } | } | ||||
| data.append(" model_id:").append(std::to_string(model_id)); | |||||
| data.append(" task_id:").append(std::to_string(graph.task_id)); | |||||
| data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||||
| data.append("\n"); | |||||
| GraphDescReport(device_id, data); | |||||
| data.clear(); | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| ReportData(device_id, reported_data, "task_desc_info"); | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | |||||
| const int32_t &device_id, const string &data, const string &tag_name) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
| int ret = -1; | int ret = -1; | ||||
| @@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d | |||||
| size_t index = data.size() / kReportMaxLen; | size_t index = data.size() / kReportMaxLen; | ||||
| if (index >= 1) { | if (index >= 1) { | ||||
| reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||||
| for (size_t i = 0; i < index; ++i) { | for (size_t i = 0; i < index; ++i) { | ||||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | ||||
| reporter_data.dataLen = kReportMaxLen; | reporter_data.dataLen = kReportMaxLen; | ||||
| cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
| return;); | |||||
| } | } | ||||
| reporter_data.dataLen = data.size() - kReportMaxLen * index; | reporter_data.dataLen = data.size() - kReportMaxLen * index; | ||||
| if (reporter_data.dataLen != 0) { | if (reporter_data.dataLen != 0) { | ||||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | ||||
| cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
| return;); | |||||
| } | } | ||||
| } else { | } else { | ||||
| reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
| reporter_data.data = (unsigned char *)data.c_str(); | reporter_data.data = (unsigned char *)data.c_str(); | ||||
| reporter_data.dataLen = data.size(); | reporter_data.dataLen = data.size(); | ||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||||
| cb_ret = CallMsprofReport(reporter_data); | cb_ret = CallMsprofReport(reporter_data); | ||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||||
| return;); | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | ||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| int32_t logic_device_id = 0; | int32_t logic_device_id = 0; | ||||
| rtError_t rt_ret = rtGetDevice(&logic_device_id); | rtError_t rt_ret = rtGetDevice(&logic_device_id); | ||||
| @@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
| GELOGD("current logic_device_id:%d", logic_device_id); | GELOGD("current logic_device_id:%d", logic_device_id); | ||||
| GELOGD("start ProfilingTaskDescInfo."); | GELOGD("start ProfilingTaskDescInfo."); | ||||
| ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | ||||
| GELOGD("start ProfilingGraphDescInfo."); | |||||
| ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||||
| GELOGD("Report profiling data for GE end."); | GELOGD("Report profiling data for GE end."); | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs | |||||
| static_cast<void *>(&reporter_data), sizeof(ReporterData)); | static_cast<void *>(&reporter_data), sizeof(ReporterData)); | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( | |||||
| const OpDescPtr &op, TaskDescInfo &task_desc_info) const { | |||||
| std::vector<Format> input_format; | |||||
| std::vector<std::vector<int64_t>> input_shape; | |||||
| std::vector<DataType> input_data_type; | |||||
| for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||||
| GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||||
| if (input_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| input_format.emplace_back(input_tensor_desc->GetFormat()); | |||||
| input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||||
| input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||||
| } | |||||
| std::vector<Format> output_format; | |||||
| std::vector<std::vector<int64_t>> output_shape; | |||||
| std::vector<DataType> output_data_type; | |||||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||||
| GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||||
| if (output_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| output_format.emplace_back(output_tensor_desc->GetFormat()); | |||||
| output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||||
| output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||||
| } | |||||
| std::vector<Format> format_default = { FORMAT_NULL }; | |||||
| std::vector<std::vector<int64_t>> shape_default = { {0} }; | |||||
| std::vector<DataType> data_type_default = { DT_UNDEFINED }; | |||||
| task_desc_info.input_format = input_format.empty() ? format_default : input_format; | |||||
| task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; | |||||
| task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; | |||||
| task_desc_info.output_format = output_format.empty() ? format_default : output_format; | |||||
| task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; | |||||
| task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | ||||
| std::string &fp_point, std::string &bp_point) { | std::string &fp_point, std::string &bp_point) { | ||||
| // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | ||||
| @@ -54,6 +54,8 @@ namespace { | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| class OpDesc; | |||||
| using OpDescPtr = std::shared_ptr<OpDesc>; | |||||
| struct DeviceSubsInfo { | struct DeviceSubsInfo { | ||||
| uint64_t module; | uint64_t module; | ||||
| uint32_t subscribe_count; | uint32_t subscribe_count; | ||||
| @@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| bool ProfilingModelExecuteOn() const; | bool ProfilingModelExecuteOn() const; | ||||
| // is_execute_profiling_ only used by ge option and env | // is_execute_profiling_ only used by ge option and env | ||||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | ||||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info); | |||||
| void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | ||||
| const int32_t &device_id); | const int32_t &device_id); | ||||
| void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||||
| const int32_t &device_id); | |||||
| void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); | |||||
| Status PluginInit() const; | Status PluginInit() const; | ||||
| void PluginUnInit() const; | void PluginUnInit() const; | ||||
| Status CallMsprofReport(ReporterData &reporter_data) const; | Status CallMsprofReport(ReporterData &reporter_data) const; | ||||
| @@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | ||||
| void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | ||||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | ||||
| void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||||
| void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||||
| private: | private: | ||||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | ||||
| Status ParseOptions(const std::string &options); | Status ParseOptions(const std::string &options); | ||||
| @@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | ||||
| vector<int32_t> &device_list); | vector<int32_t> &device_list); | ||||
| uint64_t GetProfilingModule(); | uint64_t GetProfilingModule(); | ||||
| void GraphDescReport(const int32_t &device_id, const string &data); | |||||
| void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | ||||
| void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | ||||
| @@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
| // subgraph of dynamic graph no need to find index, has been found in parent graph | // subgraph of dynamic graph no need to find index, has been found in parent graph | ||||
| if (IsSubGraphOfDynamicGraph(graph)) { | if (IsSubGraphOfDynamicGraph(graph)) { | ||||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a; | |||||
| const uint32_t kEndOfSequenceNew = 507005; | const uint32_t kEndOfSequenceNew = 507005; | ||||
| const int32_t kModelAbortNormal = 0x0704000e; | const int32_t kModelAbortNormal = 0x0704000e; | ||||
| const int32_t kModelAbortNormalNew = 507024; | const int32_t kModelAbortNormalNew = 507024; | ||||
| const uint32_t kInteval = 2; | |||||
| const char *const kModelName = "model_name"; | |||||
| const char *const kModeleId = "model_id"; | |||||
| const char *const kLoadStartTime = "load_start_time"; | |||||
| const char *const kLoadEndTime = "load_end_time"; | |||||
| const char *const kFusionOpInfo = "fusion_op_info"; | |||||
| const char *const kFusionOpName = "fusion_op_name"; | |||||
| const char *const kOriginalOpNum = "origin_op_num"; | |||||
| const char *const kOriginalOpName = "origin_op_name"; | |||||
| const char *const kStreamId = "stream_id"; | |||||
| const char *const kFusionOpMemoryInfo = "memory_info"; | |||||
| const char *const kInputSize = "input_size"; | |||||
| const char *const kOutputSize = "output_size"; | |||||
| const char *const kWeightSize = "weight_size"; | |||||
| const char *const kWorkSpaceSize = "workspace_size"; | |||||
| const char *const kTotalSize = "total_size"; | |||||
| const char *const kTaskCount = "task_count"; | |||||
| const char *const kTaskId = "task_id"; | |||||
| const char* const kRequestId = "request_id"; | |||||
| const char* const kThreadId = "thread_id"; | |||||
| const char* const kInputBeginTime = "input_begin_time"; | |||||
| const char* const kInputEndTime = "input_end_time"; | |||||
| const char* const kInferBeginTime = "infer_begin_time"; | |||||
| const char* const kInferEndTime = "infer_end_time"; | |||||
| const char* const kOutputBeginTime = "output_start_time"; | |||||
| const char* const kOutputEndTime = "output_end_time"; | |||||
| inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
| return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | ||||
| @@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| } | } | ||||
| Status DavinciModel::ReportProfilingData() { | Status DavinciModel::ReportProfilingData() { | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||||
| Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetComputeGraphInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | |||||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); | |||||
| GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { | |||||
| } | } | ||||
| Status DavinciModel::SinkModelProfile() { | Status DavinciModel::SinkModelProfile() { | ||||
| // profiling plugin must be registered | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
| ReporterData reporter_data{}; | |||||
| // report model data tag name | |||||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||||
| return FAILED, "Sink model tag memcpy error."); | |||||
| // Model Header | // Model Header | ||||
| std::string name = om_name_.empty() ? name_ : om_name_; | std::string name = om_name_.empty() ? name_ : om_name_; | ||||
| size_t name_len = name.size(); | |||||
| reporter_data.deviceId = device_id_; | |||||
| reporter_data.data = (unsigned char *)&name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)name.c_str(); | |||||
| reporter_data.dataLen = name.size(); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| uint32_t model_id = this->Id(); | uint32_t model_id = this->Id(); | ||||
| reporter_data.data = (unsigned char *)&model_id; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // Load Start/End Time | |||||
| int64_t start_time = this->GetLoadBeginTime(); | int64_t start_time = this->GetLoadBeginTime(); | ||||
| reporter_data.data = (unsigned char *)&start_time; | |||||
| reporter_data.dataLen = sizeof(int64_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| int64_t end_time = this->GetLoadEndTime(); | int64_t end_time = this->GetLoadEndTime(); | ||||
| reporter_data.data = (unsigned char *)&end_time; | |||||
| reporter_data.dataLen = sizeof(int64_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| Json model_load_info; | |||||
| model_load_info[kModelName] = name; | |||||
| model_load_info[kModeleId] = model_id; | |||||
| model_load_info[kLoadStartTime] = start_time; | |||||
| model_load_info[kLoadEndTime] = end_time; | |||||
| // fusion op info | |||||
| using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | ||||
| using Range = std::pair<CIT, CIT>; | using Range = std::pair<CIT, CIT>; | ||||
| for (const ProfileInfo &profile : profile_list_) { | for (const ProfileInfo &profile : profile_list_) { | ||||
| // op name after fusion | |||||
| Json fusion_op_info; | |||||
| string fusion_op_name = profile.fusion_info.op_name; | string fusion_op_name = profile.fusion_info.op_name; | ||||
| int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||||
| reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||||
| reporter_data.dataLen = fusion_op_name_len; | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // original op name before fusion | |||||
| uint32_t op_num = profile.fusion_info.original_op_names.size(); | uint32_t op_num = profile.fusion_info.original_op_names.size(); | ||||
| reporter_data.data = (unsigned char *)&op_num; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| vector<string> original_name; | |||||
| for (uint32_t k = 0; k < op_num; k++) { | for (uint32_t k = 0; k < op_num; k++) { | ||||
| std::string op_name = profile.fusion_info.original_op_names[k]; | |||||
| int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||||
| reporter_data.data = (unsigned char *)&op_name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)op_name.c_str(); | |||||
| reporter_data.dataLen = op_name_len; | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| } | |||||
| // stream id info | |||||
| original_name.emplace_back(profile.fusion_info.original_op_names[k]); | |||||
| } | |||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| auto iter = profiler_report_op_info_.find(fusion_op_name); | auto iter = profiler_report_op_info_.find(fusion_op_name); | ||||
| if (iter != profiler_report_op_info_.end()) { | if (iter != profiler_report_op_info_.end()) { | ||||
| stream_id = iter->second.second; | stream_id = iter->second.second; | ||||
| } | } | ||||
| reporter_data.data = (unsigned char *)&stream_id; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // memory info | |||||
| reporter_data.data = (unsigned char *)&profile.memory_info; | |||||
| reporter_data.dataLen = sizeof(profile.memory_info); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // task info | |||||
| reporter_data.data = (unsigned char *)&profile.task_count; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| fusion_op_info[kFusionOpName] = fusion_op_name; | |||||
| fusion_op_info[kOriginalOpNum] = op_num; | |||||
| fusion_op_info[kOriginalOpName] = original_name; | |||||
| fusion_op_info[kStreamId] = stream_id; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; | |||||
| fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; | |||||
| fusion_op_info[kTaskCount] = profile.task_count; | |||||
| vector<uint32_t> task_id; | |||||
| Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | ||||
| for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | ||||
| uint32_t task_id = idx->second; | |||||
| reporter_data.data = (unsigned char *)&task_id; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| task_id.push_back(idx->second); | |||||
| } | } | ||||
| fusion_op_info[kTaskId] = task_id; | |||||
| model_load_info[kFusionOpInfo] += fusion_op_info; | |||||
| } | } | ||||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| } | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | ||||
| // profiling plugin must be registered | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
| ReporterData reporter_data{}; | |||||
| string name = om_name_.empty() ? name_ : om_name_; | |||||
| Json model_time_info; | |||||
| model_time_info[kModelName] = name; | |||||
| model_time_info[kModeleId] = this->Id(); | |||||
| model_time_info[kRequestId] = current_data.request_id; | |||||
| model_time_info[kThreadId] = GetDataInputTid(); | |||||
| model_time_info[kInputBeginTime] = time_info_.processBeginTime; | |||||
| model_time_info[kInputEndTime] = time_info_.processEndTime; | |||||
| model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; | |||||
| model_time_info[kInferEndTime] = time_info_.inferenceEndTime; | |||||
| model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; | |||||
| model_time_info[kOutputEndTime] = time_info_.dumpEndTime; | |||||
| // report model data tag name | // report model data tag name | ||||
| std::string tag_name; | std::string tag_name; | ||||
| tag_name.append("model_time_info_") | tag_name.append("model_time_info_") | ||||
| .append(std::to_string(this->Id())) | |||||
| .append("_") | |||||
| .append(std::to_string(current_data.index)); | |||||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||||
| return FAILED, "Sink model tag memcpy error."); | |||||
| // device id | |||||
| reporter_data.deviceId = device_id_; | |||||
| // Model Header | |||||
| string name; | |||||
| if (!om_name_.empty()) { | |||||
| name = om_name_; | |||||
| } else { | |||||
| name = name_; | |||||
| } | |||||
| size_t name_len = name.size(); | |||||
| reporter_data.data = (unsigned char *)&name_len; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| reporter_data.data = (unsigned char *)name.c_str(); | |||||
| reporter_data.dataLen = name.size(); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u.", this->Id()); | |||||
| // request id | |||||
| uint64_t request_id = current_data.request_id; | |||||
| reporter_data.data = (unsigned char *)&request_id; | |||||
| reporter_data.dataLen = sizeof(uint32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
| // thread id | |||||
| int32_t thread_id = GetDataInputTid(); | |||||
| reporter_data.data = (unsigned char *)&thread_id; | |||||
| reporter_data.dataLen = sizeof(int32_t); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
| // time info | |||||
| time_info_.modelId = this->Id(); | |||||
| reporter_data.data = (unsigned char *)&time_info_; | |||||
| reporter_data.dataLen = sizeof(struct timeInfo); | |||||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||||
| .append(std::to_string(this->Id())) | |||||
| .append("_") | |||||
| .append(std::to_string(current_data.index)); | |||||
| std::string reported_data; | |||||
| try { | |||||
| reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||||
| } catch (std::exception &e) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| } | |||||
| reported_data.append(",") | |||||
| .append("\n"); | |||||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||||
| task_desc_info.model_name = name_; | task_desc_info.model_name = name_; | ||||
| } | } | ||||
| task_desc_info.op_name = op->GetName(); | task_desc_info.op_name = op->GetName(); | ||||
| task_desc_info.op_type = op->GetType(); | |||||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | task_desc_info.block_dim = task_def.kernel().block_dim(); | ||||
| task_desc_info.task_id = task->GetTaskID(); | task_desc_info.task_id = task->GetTaskID(); | ||||
| task_desc_info.stream_id = task->GetStreamId(); | task_desc_info.stream_id = task->GetStreamId(); | ||||
| task_desc_info.shape_type = "static"; | task_desc_info.shape_type = "static"; | ||||
| task_desc_info.cur_iter_num = 0; | task_desc_info.cur_iter_num = 0; | ||||
| // task type | |||||
| task_desc_info.task_type = kTaskTypeInvalid; | task_desc_info.task_type = kTaskTypeInvalid; | ||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| prof_mgr.GetOpInputOutputInfo(op, task_desc_info); | |||||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | if (model_task_type == RT_MODEL_TASK_KERNEL) { | ||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
| @@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||||
| task_desc_info_.emplace_back(task_desc_info); | task_desc_info_.emplace_back(task_desc_info); | ||||
| } | } | ||||
| } | } | ||||
| return; | |||||
| } | } | ||||
| Status DavinciModel::DistributeTask() { | Status DavinciModel::DistributeTask() { | ||||
| @@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||||
| main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | ||||
| } | } | ||||
| Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||||
| auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||||
| for (auto &op_desc : all_op_desc) { | |||||
| ComputeGraphDescInfo compute_graph_info; | |||||
| if (!om_name_.empty()) { | |||||
| compute_graph_info.model_name = om_name_; | |||||
| } else { | |||||
| compute_graph_info.model_name = name_; | |||||
| } | |||||
| std::vector<Format> format = { FORMAT_NULL }; | |||||
| std::vector<std::vector<int64_t>> shape = { {0} }; | |||||
| std::vector<DataType> data_type = { DT_UNDEFINED }; | |||||
| compute_graph_info.op_name = op_desc.op_name; | |||||
| compute_graph_info.op_type = op_desc.op_type; | |||||
| compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||||
| compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||||
| compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||||
| compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||||
| compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||||
| compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||||
| if (iter != profiler_report_op_info_.end()) { | |||||
| task_id = iter->second.first; | |||||
| stream_id = iter->second.second; | |||||
| } | |||||
| compute_graph_info.task_id = task_id; | |||||
| compute_graph_info.stream_id = stream_id; | |||||
| graph_desc_info.emplace_back(compute_graph_info); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | ||||
| if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | ||||
| tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | ||||
| @@ -840,9 +840,6 @@ class DavinciModel { | |||||
| Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | ||||
| // get desc info of graph for profiling | |||||
| Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||||
| void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | ||||
| Status InitL1DataDumperArgs(); | Status InitL1DataDumperArgs(); | ||||
| @@ -70,8 +70,6 @@ class NodeDoneCallback { | |||||
| Status PrepareConstInputs(const NodeItem &node_item); | Status PrepareConstInputs(const NodeItem &node_item); | ||||
| Status DumpDynamicNode(); | Status DumpDynamicNode(); | ||||
| Status ProfilingReport(); | Status ProfilingReport(); | ||||
| Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||||
| std::vector<ComputeGraphDescInfo> &compute_graph_info); | |||||
| Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | ||||
| std::vector<TaskDescInfo> &task_desc_info); | std::vector<TaskDescInfo> &task_desc_info); | ||||
| GraphExecutionContext *graph_context_; | GraphExecutionContext *graph_context_; | ||||
| @@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
| } | } | ||||
| GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | ||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| task_desc_info = context_->GetProfilingTaskDescInfo(); | task_desc_info = context_->GetProfilingTaskDescInfo(); | ||||
| context_->ClearProfilingTaskDescInfo(); | context_->ClearProfilingTaskDescInfo(); | ||||
| return SUCCESS; | |||||
| } | |||||
| Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||||
| std::vector<ComputeGraphDescInfo> &compute_graph_info) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(model); | |||||
| GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||||
| compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||||
| context_->ClearProfilingGraphDescInfo(); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| for (auto &tmp_compute_graph_info : compute_graph_info) { | |||||
| // default | |||||
| if (op_desc->GetAllInputsSize() == 0) { | |||||
| tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||||
| tmp_compute_graph_info.input_shape = { {0} }; | |||||
| tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||||
| } | |||||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
| GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
| if (input_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
| tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
| tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
| } | |||||
| if (op_desc->GetOutputsSize() == 0) { | |||||
| tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||||
| tmp_compute_graph_info.output_shape = { {0} }; | |||||
| tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||||
| } | |||||
| for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
| GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
| tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
| } | |||||
| for (auto &tmp_task_desc : task_desc_info) { | |||||
| // save op input and output info | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { | |||||
| return profiling_ret; | return profiling_ret; | ||||
| } | } | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); | |||||
| if (profiling_ret != RT_ERROR_NONE) { | |||||
| GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); | |||||
| return profiling_ret; | |||||
| } | |||||
| auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); | |||||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return FAILED; | |||||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | ||||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| } | } | ||||
| @@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return FAILED; | |||||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | ||||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
| @@ -515,7 +515,7 @@ Status TaskContext::Synchronize() { | |||||
| } | } | ||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | ||||
| uint32_t task_type, uint32_t block_dim) { | |||||
| const std::string &task_type, uint32_t block_dim) { | |||||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | ||||
| const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
| auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
| @@ -525,11 +525,11 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
| const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
| GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
| std::string op_name = op_desc->GetName(); | |||||
| std::string dynamic_model_name = model->GetModelName(); | std::string dynamic_model_name = model->GetModelName(); | ||||
| TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
| tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.op_name = op_desc->GetName(); | |||||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||||
| tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
| tmp_task_desc_info.task_type = task_type; | tmp_task_desc_info.task_type = task_type; | ||||
| tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
| @@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { | |||||
| return node_state_; | return node_state_; | ||||
| } | } | ||||
| Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
| const NodeItem &node_item = GetNodeItem(); | |||||
| auto op_desc = node_item.GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
| GE_CHECK_NOTNULL(graph_context); | |||||
| const HybridModel *model = graph_context->model; | |||||
| GE_CHECK_NOTNULL(model); | |||||
| std::string dynamic_model_name = model->GetModelName(); | |||||
| auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||||
| op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
| ComputeGraphDescInfo tmp_compute_graph_info; | |||||
| tmp_compute_graph_info.model_name = dynamic_model_name; | |||||
| tmp_compute_graph_info.op_name = op_desc->GetName(); | |||||
| tmp_compute_graph_info.op_type = op_desc->GetType(); | |||||
| tmp_compute_graph_info.task_id = task_id; | |||||
| tmp_compute_graph_info.stream_id = stream_id; | |||||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -113,13 +113,10 @@ class TaskContext { | |||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
| const std::string &task_type, uint32_t block_dim); | |||||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
| const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||||
| Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||||
| void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||||
| private: | private: | ||||
| TaskContext(GraphExecutionContext *execution_context, | TaskContext(GraphExecutionContext *execution_context, | ||||
| NodeState *node_state, | NodeState *node_state, | ||||
| @@ -141,7 +138,6 @@ class TaskContext { | |||||
| uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
| uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
| std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| string model_name; | |||||
| string op_name; | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| uint32_t model_id; | uint32_t model_id; | ||||
| uint32_t block_dim; | |||||
| if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||||
| if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||||
| std::vector<TaskDescInfo> task_desc_info; | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", | |||||
| tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| tmp_task_desc_info.model_name = model_name; | |||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.block_dim = block_dim; | |||||
| tmp_task_desc_info.task_id = task_id; | |||||
| tmp_task_desc_info.stream_id = stream_id; | |||||
| tmp_task_desc_info.shape_type = shape_type; | tmp_task_desc_info.shape_type = shape_type; | ||||
| tmp_task_desc_info.cur_iter_num = 0; | tmp_task_desc_info.cur_iter_num = 0; | ||||
| tmp_task_desc_info.task_type = op_task->GetTaskType(); | tmp_task_desc_info.task_type = op_task->GetTaskType(); | ||||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| std::vector<TaskDescInfo> task_desc_info; | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
| profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); | |||||
| profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include "aicpu/common/aicpu_task_struct.h" | #include "aicpu/common/aicpu_task_struct.h" | ||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
| #include "common/profiling/profiling_manager.h" | |||||
| #include "common/formats/formats.h" | #include "common/formats/formats.h" | ||||
| #include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| @@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||||
| model_id_ = model_id; | model_id_ = model_id; | ||||
| } | } | ||||
| Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||||
| uint32_t &block_dim) { | |||||
| model_name = model_name_; | |||||
| model_id = model_id_; | |||||
| block_dim = block_dim_; | |||||
| Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
| op_name = op_desc_->GetName(); | |||||
| string op_name = op_desc_->GetName(); | |||||
| GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
| model_id = model_id_; | |||||
| task_desc_info.model_name = model_name_; | |||||
| task_desc_info.block_dim = block_dim_; | |||||
| task_desc_info.task_id = task_id; | |||||
| task_desc_info.stream_id = stream_id; | |||||
| task_desc_info.op_name = op_name; | |||||
| task_desc_info.op_type = op_desc_->GetType(); | |||||
| auto &prof_mgr = ProfilingManager::Instance(); | |||||
| prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| @@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
| const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
| TbeOpTask::~TbeOpTask() { | TbeOpTask::~TbeOpTask() { | ||||
| if (sm_desc_ != nullptr) { | if (sm_desc_ != nullptr) { | ||||
| @@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||||
| const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | ||||
| uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
| const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
| void TbeOpTask::SetHandle(void *handle) { | void TbeOpTask::SetHandle(void *handle) { | ||||
| this->handle_ = handle; | this->handle_ = handle; | ||||
| @@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
| return DoUpdateArgTable(param, false); | return DoUpdateArgTable(param, false); | ||||
| } | } | ||||
| uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
| const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
| void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | ||||
| arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | ||||
| @@ -43,7 +43,7 @@ class OpTask { | |||||
| const vector<GeTensorDesc> &output_desc); | const vector<GeTensorDesc> &output_desc); | ||||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
| void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
| Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||||
| const OpDescPtr &GetOpdesc() const {return op_desc_;} | const OpDescPtr &GetOpdesc() const {return op_desc_;} | ||||
| Status OpenDump(rtStream_t stream); | Status OpenDump(rtStream_t stream); | ||||
| virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | ||||
| @@ -52,7 +52,7 @@ class OpTask { | |||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| std::vector<DataBuffer> &output_buffers, | std::vector<DataBuffer> &output_buffers, | ||||
| rtStream_t stream); | rtStream_t stream); | ||||
| virtual uint32_t GetTaskType() const; | |||||
| virtual const std::string &GetTaskType() const; | |||||
| protected: | protected: | ||||
| Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | ||||
| @@ -88,7 +88,7 @@ class TbeOpTask : public OpTask { | |||||
| size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
| const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | ||||
| uint32_t GetTaskType() const override; | |||||
| const std::string &GetTaskType() const override; | |||||
| void SetHandle(void *handle); | void SetHandle(void *handle); | ||||
| private: | private: | ||||
| @@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask { | |||||
| ~AiCpuBaseTask() override; | ~AiCpuBaseTask() override; | ||||
| UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | ||||
| Status UpdateArgTable(const SingleOpModelParam ¶m) override; | Status UpdateArgTable(const SingleOpModelParam ¶m) override; | ||||
| uint32_t GetTaskType() const override; | |||||
| const std::string &GetTaskType() const override; | |||||
| protected: | protected: | ||||
| Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| @@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
| // profiling data | // profiling data | ||||
| const uint32_t kTaskTypeAicore = 0; | |||||
| const uint32_t kTaskTypeAicpu = 1; | |||||
| const uint32_t kTaskTypeInvalid = 0xFFFF; | |||||
| const std::string kTaskTypeAicore = "AI_CORE"; | |||||
| const std::string kTaskTypeAicpu = "AI_CPU"; | |||||
| const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||||
| // Data cache, including data address and length | // Data cache, including data address and length | ||||
| struct DataBuffer { | struct DataBuffer { | ||||
| @@ -251,27 +251,19 @@ struct Options { | |||||
| struct TaskDescInfo { | struct TaskDescInfo { | ||||
| std::string model_name; | std::string model_name; | ||||
| std::string op_name; | std::string op_name; | ||||
| std::string op_type; | |||||
| uint32_t block_dim; | uint32_t block_dim; | ||||
| uint32_t task_id; | uint32_t task_id; | ||||
| uint32_t stream_id; | uint32_t stream_id; | ||||
| std::string shape_type; | std::string shape_type; | ||||
| int64_t cur_iter_num; | int64_t cur_iter_num; | ||||
| uint32_t task_type; | |||||
| }; | |||||
| // Profiling info of graph | |||||
| struct ComputeGraphDescInfo { | |||||
| std::string model_name; | |||||
| std::string op_name; | |||||
| std::string op_type; | |||||
| std::string task_type; | |||||
| std::vector<Format> input_format; | std::vector<Format> input_format; | ||||
| std::vector<std::vector<int64_t>> input_shape; | std::vector<std::vector<int64_t>> input_shape; | ||||
| std::vector<DataType> input_data_type; | std::vector<DataType> input_data_type; | ||||
| std::vector<Format> output_format; | std::vector<Format> output_format; | ||||
| std::vector<std::vector<int64_t>> output_shape; | std::vector<std::vector<int64_t>> output_shape; | ||||
| std::vector<DataType> output_data_type; | std::vector<DataType> output_data_type; | ||||
| uint32_t task_id; | |||||
| uint32_t stream_id; | |||||
| }; | }; | ||||
| struct OpDescInfo { | struct OpDescInfo { | ||||
| @@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES | |||||
| ) | ) | ||||
| set(SINGLE_OP_TEST_FILES | set(SINGLE_OP_TEST_FILES | ||||
| #"single_op/single_op_model_unittest.cc" | |||||
| "single_op/single_op_model_unittest.cc" | |||||
| "single_op/single_op_manager_unittest.cc" | "single_op/single_op_manager_unittest.cc" | ||||
| "single_op/stream_resource_unittest.cc" | "single_op/stream_resource_unittest.cc" | ||||
| "single_op/single_op_task_unittest.cc" | "single_op/single_op_task_unittest.cc" | ||||
| @@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||||
| model.SinkModelProfile(); | model.SinkModelProfile(); | ||||
| } | } | ||||
| TEST_F(UtestDavinciModel, Sink_time_profile) { | |||||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||||
| DavinciModel model(0, nullptr); | |||||
| InputData current_data; | |||||
| model.SinkTimeProfile(current_data); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { | |||||
| void TearDown() {} | void TearDown() {} | ||||
| }; | }; | ||||
| //rt api stub | |||||
| rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| /* | /* | ||||
| TEST_F(UtestSingleOpModel, test_init_model) { | TEST_F(UtestSingleOpModel, test_init_model) { | ||||
| string model_data_str = "123456789"; | string model_data_str = "123456789"; | ||||
| @@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { | |||||
| std::mutex stream_mu_; | std::mutex stream_mu_; | ||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| SingleOp single_op(&stream_mu_, stream_); | |||||
| ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||||
| // SingleOp single_op(&stream_mu_, stream_); | |||||
| // | |||||
| // ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||||
| } | } | ||||
| /* | /* | ||||
| TEST_F(UtestSingleOpModel, test_build_kernel_task) { | TEST_F(UtestSingleOpModel, test_build_kernel_task) { | ||||
| @@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { | |||||
| ASSERT_EQ(op_model.Init(), FAILED); | ASSERT_EQ(op_model.Init(), FAILED); | ||||
| } | } | ||||
| */ | */ | ||||
| /* | |||||
| TEST_F(UtestSingleOpModel, test_parse_arg_table) { | TEST_F(UtestSingleOpModel, test_parse_arg_table) { | ||||
| string model_data_str = "123456789"; | string model_data_str = "123456789"; | ||||
| SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | ||||
| @@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||||
| ASSERT_EQ(op.arg_table_[1].size(), 1); | ASSERT_EQ(op.arg_table_[1].size(), 1); | ||||
| ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | ||||
| } | } | ||||
| */ | |||||
| TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { | |||||
| string name = "relu"; | |||||
| string type = "relu"; | |||||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||||
| op_desc->SetStreamId(0); | |||||
| op_desc->SetId(0); | |||||
| TbeOpTask task; | |||||
| task.op_desc_ = op_desc; | |||||
| task.model_name_ = "resnet_50"; | |||||
| task.model_id_ = 1; | |||||
| TaskDescInfo task_desc_info; | |||||
| uint32_t model_id; | |||||
| task.GetProfilingArgs(task_desc_info, model_id); | |||||
| ASSERT_EQ(task_desc_info.model_name, "resnet_50"); | |||||
| ASSERT_EQ(model_id, 1); | |||||
| } | |||||