| @@ -20,6 +20,8 @@ | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/string_util.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/types.h" | |||
| #include "runtime/base.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| @@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| const size_t kReportMaxLen = 2048; | |||
| const int32_t kMaxDeviceNum = 256; | |||
| const uint32_t kInteval = 2; | |||
| const std::string kConfigNumsdev = "devNums"; | |||
| const std::string kConfigDevIdList = "devIdList"; | |||
| const std::string kProfStart = "prof_start"; | |||
| const std::string kProfStop = "prof_stop"; | |||
| const std::string kProfModelSubscribe = "prof_model_subscribe"; | |||
| const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
| const std::string kModelName = "model_name"; | |||
| const std::string kModelId = "model_id"; | |||
| const std::string kOpNmae = "op_name"; | |||
| const std::string kOptype = "op_type"; | |||
| const std::string kBlockDim = "block_dims"; | |||
| const std::string kTaskId = "task_id"; | |||
| const std::string kStreamId = "stream_id"; | |||
| const std::string kShapeType = "shape_type"; | |||
| const std::string kCurIterNum = "cur_iter_num"; | |||
| const std::string kTaskType = "task_type"; | |||
| const std::string kInput = "input"; | |||
| const std::string kOutput = "output"; | |||
| const std::string kFormat = "format"; | |||
| const std::string kDataType = "data_type"; | |||
| const std::string kShape = "shape"; | |||
| const std::string kIdx = "idx"; | |||
| #endif | |||
| } // namespace | |||
| @@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
| #endif | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( | |||
| const TaskDescInfo &task, Json &task_json) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| std::string data; | |||
| for (const auto &task : task_desc_info) { | |||
| std::string model_name = task.model_name; | |||
| std::string op_name = task.op_name; | |||
| uint32_t block_dim = task.block_dim; | |||
| uint32_t task_id = task.task_id; | |||
| uint32_t stream_id = task.stream_id; | |||
| std::string shape_type = task.shape_type; | |||
| int64_t cur_iter_num = task.cur_iter_num; | |||
| uint32_t task_type = task.task_type; | |||
| data = model_name.append(" ") | |||
| .append(op_name).append(" ") | |||
| .append(std::to_string(block_dim)).append(" ") | |||
| .append(std::to_string(task_id)).append(" ") | |||
| .append(std::to_string(stream_id)).append(" ") | |||
| .append(std::to_string(model_id)).append(" ") | |||
| .append(shape_type).append(" ") | |||
| .append(std::to_string(cur_iter_num)).append(" ") | |||
| .append(std::to_string(task_type)).append("\n"); | |||
| ReporterData reporter_data{}; | |||
| reporter_data.deviceId = device_id; | |||
| reporter_data.data = (unsigned char *)data.c_str(); | |||
| reporter_data.dataLen = data.size(); | |||
| int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); | |||
| if (ret != EOK) { | |||
| GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); | |||
| return; | |||
| } | |||
| int32_t cb_ret = CallMsprofReport(reporter_data); | |||
| if (cb_ret != 0) { | |||
| GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < task.input_format.size(); i++) { | |||
| Json tmp_input; | |||
| tmp_input[kIdx] = i; | |||
| Format format = task.input_format[i]; | |||
| tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); | |||
| DataType data_type = task.input_data_type[i]; | |||
| tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||
| tmp_input[kShape] = task.input_shape[i]; | |||
| task_json[kInput] += tmp_input; | |||
| } | |||
| for (size_t i = 0; i < task.output_format.size(); i++) { | |||
| Json tmp_output; | |||
| tmp_output[kIdx] = i; | |||
| Format format = task.output_format[i]; | |||
| tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); | |||
| DataType data_type = task.output_data_type[i]; | |||
| tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||
| tmp_output[kShape] = task.output_shape[i]; | |||
| task_json[kOutput] += tmp_output; | |||
| } | |||
| data.clear(); | |||
| #endif | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||
| uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| std::string data; | |||
| for (const auto &graph : compute_graph_desc_info) { | |||
| data.append("model_name:") | |||
| .append(graph.model_name) | |||
| .append(" op_name:") | |||
| .append(graph.op_name) | |||
| .append(" op_type:") | |||
| .append(graph.op_type); | |||
| for (size_t i = 0; i < graph.input_format.size(); ++i) { | |||
| data.append(" input_id:") | |||
| .append(std::to_string(i)) | |||
| .append(" input_format:") | |||
| .append(std::to_string(graph.input_format.at(i))) | |||
| .append(" input_data_type:") | |||
| .append(std::to_string(graph.input_data_type.at(i))) | |||
| .append(" input_shape:\""); | |||
| size_t input_shape_len = graph.input_shape.at(i).size(); | |||
| if (input_shape_len == 0) { | |||
| data.append(""); | |||
| } else if (input_shape_len == 1) { | |||
| data.append(std::to_string(graph.input_shape.at(i).at(0))); | |||
| } else { | |||
| for (size_t j = 0; j < input_shape_len - 1; ++j) { | |||
| data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); | |||
| } | |||
| data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); | |||
| } | |||
| data.append("\""); | |||
| } | |||
| for (size_t i = 0; i < graph.output_format.size(); ++i) { | |||
| data.append(" output_id:") | |||
| .append(std::to_string(i)) | |||
| .append(" output_format:") | |||
| .append(std::to_string(graph.output_format.at(i))) | |||
| .append(" output_data_type:") | |||
| .append(std::to_string(graph.output_data_type.at(i))) | |||
| .append(" output_shape:\""); | |||
| size_t output_shape_len = graph.output_shape.at(i).size(); | |||
| if (output_shape_len == 0) { | |||
| data.append(""); | |||
| } else if (output_shape_len == 1) { | |||
| data.append(std::to_string(graph.output_shape.at(i).at(0))); | |||
| } else { | |||
| for (size_t j = 0; j < output_shape_len - 1; ++j) { | |||
| data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); | |||
| } | |||
| data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); | |||
| } | |||
| data.append("\""); | |||
| for (const auto &task : task_desc_info) { | |||
| Json task_info; | |||
| task_info[kModelName] = task.model_name; | |||
| task_info[kModelId] = model_id; | |||
| task_info[kOpNmae] = task.op_name; | |||
| task_info[kOptype] = task.op_type; | |||
| task_info[kBlockDim] = task.block_dim; | |||
| task_info[kTaskType] = task.task_type; | |||
| task_info[kTaskId] = task.task_id; | |||
| task_info[kStreamId] = task.stream_id; | |||
| task_info[kCurIterNum] = task.cur_iter_num; | |||
| task_info[kShapeType] = task.shape_type; | |||
| ProfilingOpInputOutInfo(task, task_info); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| return ; | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| return; | |||
| } | |||
| data.append(" model_id:").append(std::to_string(model_id)); | |||
| data.append(" task_id:").append(std::to_string(graph.task_id)); | |||
| data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||
| data.append("\n"); | |||
| GraphDescReport(device_id, data); | |||
| data.clear(); | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| ReportData(device_id, reported_data, "task_desc_info"); | |||
| } | |||
| #endif | |||
| } | |||
| void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | |||
| const int32_t &device_id, const string &data, const string &tag_name) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| ReporterData reporter_data{}; | |||
| int ret = -1; | |||
| @@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d | |||
| size_t index = data.size() / kReportMaxLen; | |||
| if (index >= 1) { | |||
| reporter_data.deviceId = device_id; | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||
| for (size_t i = 0; i < index; ++i) { | |||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | |||
| reporter_data.dataLen = kReportMaxLen; | |||
| cb_ret = CallMsprofReport(reporter_data); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
| return;); | |||
| } | |||
| reporter_data.dataLen = data.size() - kReportMaxLen * index; | |||
| if (reporter_data.dataLen != 0) { | |||
| reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | |||
| cb_ret = CallMsprofReport(reporter_data); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
| return;); | |||
| } | |||
| } else { | |||
| reporter_data.deviceId = device_id; | |||
| reporter_data.data = (unsigned char *)data.c_str(); | |||
| reporter_data.dataLen = data.size(); | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||
| ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||
| GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||
| cb_ret = CallMsprofReport(reporter_data); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
| GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
| return;); | |||
| } | |||
| #endif | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||
| uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) { | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| int32_t logic_device_id = 0; | |||
| rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||
| @@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
| GELOGD("current logic_device_id:%d", logic_device_id); | |||
| GELOGD("start ProfilingTaskDescInfo."); | |||
| ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | |||
| GELOGD("start ProfilingGraphDescInfo."); | |||
| ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||
| GELOGD("Report profiling data for GE end."); | |||
| #endif | |||
| } | |||
| @@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs | |||
| static_cast<void *>(&reporter_data), sizeof(ReporterData)); | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( | |||
| const OpDescPtr &op, TaskDescInfo &task_desc_info) const { | |||
| std::vector<Format> input_format; | |||
| std::vector<std::vector<int64_t>> input_shape; | |||
| std::vector<DataType> input_data_type; | |||
| for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||
| GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||
| if (input_tensor_desc == nullptr) { | |||
| continue; | |||
| } | |||
| input_format.emplace_back(input_tensor_desc->GetFormat()); | |||
| input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||
| input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||
| } | |||
| std::vector<Format> output_format; | |||
| std::vector<std::vector<int64_t>> output_shape; | |||
| std::vector<DataType> output_data_type; | |||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||
| GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||
| if (output_tensor_desc == nullptr) { | |||
| continue; | |||
| } | |||
| output_format.emplace_back(output_tensor_desc->GetFormat()); | |||
| output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||
| output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||
| } | |||
| std::vector<Format> format_default = { FORMAT_NULL }; | |||
| std::vector<std::vector<int64_t>> shape_default = { {0} }; | |||
| std::vector<DataType> data_type_default = { DT_UNDEFINED }; | |||
| task_desc_info.input_format = input_format.empty() ? format_default : input_format; | |||
| task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; | |||
| task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; | |||
| task_desc_info.output_format = output_format.empty() ? format_default : output_format; | |||
| task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; | |||
| task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | |||
| std::string &fp_point, std::string &bp_point) { | |||
| // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | |||
| @@ -54,6 +54,8 @@ namespace { | |||
| } // namespace | |||
| namespace ge { | |||
| class OpDesc; | |||
| using OpDescPtr = std::shared_ptr<OpDesc>; | |||
| struct DeviceSubsInfo { | |||
| uint64_t module; | |||
| uint32_t subscribe_count; | |||
| @@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| bool ProfilingModelExecuteOn() const; | |||
| // is_execute_profiling_ only used by ge option and env | |||
| bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
| void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info); | |||
| void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
| const int32_t &device_id); | |||
| void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
| const int32_t &device_id); | |||
| void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); | |||
| Status PluginInit() const; | |||
| void PluginUnInit() const; | |||
| Status CallMsprofReport(ReporterData &reporter_data) const; | |||
| @@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | |||
| void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | |||
| void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
| void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||
| void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||
| private: | |||
| Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | |||
| Status ParseOptions(const std::string &options); | |||
| @@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
| Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | |||
| vector<int32_t> &device_list); | |||
| uint64_t GetProfilingModule(); | |||
| void GraphDescReport(const int32_t &device_id, const string &data); | |||
| void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | |||
| void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | |||
| @@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
| // subgraph of dynamic graph no need to find index, has been found in parent graph | |||
| if (IsSubGraphOfDynamicGraph(graph)) { | |||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||
| GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| @@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a; | |||
| const uint32_t kEndOfSequenceNew = 507005; | |||
| const int32_t kModelAbortNormal = 0x0704000e; | |||
| const int32_t kModelAbortNormalNew = 507024; | |||
| const uint32_t kInteval = 2; | |||
| const char *const kModelName = "model_name"; | |||
| const char *const kModeleId = "model_id"; | |||
| const char *const kLoadStartTime = "load_start_time"; | |||
| const char *const kLoadEndTime = "load_end_time"; | |||
| const char *const kFusionOpInfo = "fusion_op_info"; | |||
| const char *const kFusionOpName = "fusion_op_name"; | |||
| const char *const kOriginalOpNum = "origin_op_num"; | |||
| const char *const kOriginalOpName = "origin_op_name"; | |||
| const char *const kStreamId = "stream_id"; | |||
| const char *const kFusionOpMemoryInfo = "memory_info"; | |||
| const char *const kInputSize = "input_size"; | |||
| const char *const kOutputSize = "output_size"; | |||
| const char *const kWeightSize = "weight_size"; | |||
| const char *const kWorkSpaceSize = "workspace_size"; | |||
| const char *const kTotalSize = "total_size"; | |||
| const char *const kTaskCount = "task_count"; | |||
| const char *const kTaskId = "task_id"; | |||
| const char* const kRequestId = "request_id"; | |||
| const char* const kThreadId = "thread_id"; | |||
| const char* const kInputBeginTime = "input_begin_time"; | |||
| const char* const kInputEndTime = "input_end_time"; | |||
| const char* const kInferBeginTime = "infer_begin_time"; | |||
| const char* const kInferEndTime = "infer_end_time"; | |||
| const char* const kOutputBeginTime = "output_start_time"; | |||
| const char* const kOutputEndTime = "output_end_time"; | |||
| inline bool IsDataOp(const std::string &node_type) { | |||
| return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | |||
| @@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| } | |||
| Status DavinciModel::ReportProfilingData() { | |||
| std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||
| Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "GetComputeGraphInfo failed."); | |||
| return ret; | |||
| } | |||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | |||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); | |||
| GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | |||
| return SUCCESS; | |||
| @@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { | |||
| } | |||
| Status DavinciModel::SinkModelProfile() { | |||
| // profiling plugin must be registered | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| ReporterData reporter_data{}; | |||
| // report model data tag name | |||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
| return FAILED, "Sink model tag memcpy error."); | |||
| // Model Header | |||
| std::string name = om_name_.empty() ? name_ : om_name_; | |||
| size_t name_len = name.size(); | |||
| reporter_data.deviceId = device_id_; | |||
| reporter_data.data = (unsigned char *)&name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)name.c_str(); | |||
| reporter_data.dataLen = name.size(); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| uint32_t model_id = this->Id(); | |||
| reporter_data.data = (unsigned char *)&model_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // Load Start/End Time | |||
| int64_t start_time = this->GetLoadBeginTime(); | |||
| reporter_data.data = (unsigned char *)&start_time; | |||
| reporter_data.dataLen = sizeof(int64_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| int64_t end_time = this->GetLoadEndTime(); | |||
| reporter_data.data = (unsigned char *)&end_time; | |||
| reporter_data.dataLen = sizeof(int64_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| Json model_load_info; | |||
| model_load_info[kModelName] = name; | |||
| model_load_info[kModeleId] = model_id; | |||
| model_load_info[kLoadStartTime] = start_time; | |||
| model_load_info[kLoadEndTime] = end_time; | |||
| // fusion op info | |||
| using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | |||
| using Range = std::pair<CIT, CIT>; | |||
| for (const ProfileInfo &profile : profile_list_) { | |||
| // op name after fusion | |||
| Json fusion_op_info; | |||
| string fusion_op_name = profile.fusion_info.op_name; | |||
| int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||
| reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||
| reporter_data.dataLen = fusion_op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // original op name before fusion | |||
| uint32_t op_num = profile.fusion_info.original_op_names.size(); | |||
| reporter_data.data = (unsigned char *)&op_num; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| vector<string> original_name; | |||
| for (uint32_t k = 0; k < op_num; k++) { | |||
| std::string op_name = profile.fusion_info.original_op_names[k]; | |||
| int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||
| reporter_data.data = (unsigned char *)&op_name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)op_name.c_str(); | |||
| reporter_data.dataLen = op_name_len; | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| } | |||
| // stream id info | |||
| original_name.emplace_back(profile.fusion_info.original_op_names[k]); | |||
| } | |||
| uint32_t stream_id = 0; | |||
| auto iter = profiler_report_op_info_.find(fusion_op_name); | |||
| if (iter != profiler_report_op_info_.end()) { | |||
| stream_id = iter->second.second; | |||
| } | |||
| reporter_data.data = (unsigned char *)&stream_id; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // memory info | |||
| reporter_data.data = (unsigned char *)&profile.memory_info; | |||
| reporter_data.dataLen = sizeof(profile.memory_info); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // task info | |||
| reporter_data.data = (unsigned char *)&profile.task_count; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| fusion_op_info[kFusionOpName] = fusion_op_name; | |||
| fusion_op_info[kOriginalOpNum] = op_num; | |||
| fusion_op_info[kOriginalOpName] = original_name; | |||
| fusion_op_info[kStreamId] = stream_id; | |||
| fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; | |||
| fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; | |||
| fusion_op_info[kTaskCount] = profile.task_count; | |||
| vector<uint32_t> task_id; | |||
| Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | |||
| for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||
| uint32_t task_id = idx->second; | |||
| reporter_data.data = (unsigned char *)&task_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| task_id.push_back(idx->second); | |||
| } | |||
| fusion_op_info[kTaskId] = task_id; | |||
| model_load_info[kFusionOpInfo] += fusion_op_info; | |||
| } | |||
| std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||
| // profiling plugin must be registered | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| ReporterData reporter_data{}; | |||
| string name = om_name_.empty() ? name_ : om_name_; | |||
| Json model_time_info; | |||
| model_time_info[kModelName] = name; | |||
| model_time_info[kModeleId] = this->Id(); | |||
| model_time_info[kRequestId] = current_data.request_id; | |||
| model_time_info[kThreadId] = GetDataInputTid(); | |||
| model_time_info[kInputBeginTime] = time_info_.processBeginTime; | |||
| model_time_info[kInputEndTime] = time_info_.processEndTime; | |||
| model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; | |||
| model_time_info[kInferEndTime] = time_info_.inferenceEndTime; | |||
| model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; | |||
| model_time_info[kOutputEndTime] = time_info_.dumpEndTime; | |||
| // report model data tag name | |||
| std::string tag_name; | |||
| tag_name.append("model_time_info_") | |||
| .append(std::to_string(this->Id())) | |||
| .append("_") | |||
| .append(std::to_string(current_data.index)); | |||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
| return FAILED, "Sink model tag memcpy error."); | |||
| // device id | |||
| reporter_data.deviceId = device_id_; | |||
| // Model Header | |||
| string name; | |||
| if (!om_name_.empty()) { | |||
| name = om_name_; | |||
| } else { | |||
| name = name_; | |||
| } | |||
| size_t name_len = name.size(); | |||
| reporter_data.data = (unsigned char *)&name_len; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| reporter_data.data = (unsigned char *)name.c_str(); | |||
| reporter_data.dataLen = name.size(); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u.", this->Id()); | |||
| // request id | |||
| uint64_t request_id = current_data.request_id; | |||
| reporter_data.data = (unsigned char *)&request_id; | |||
| reporter_data.dataLen = sizeof(uint32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
| // thread id | |||
| int32_t thread_id = GetDataInputTid(); | |||
| reporter_data.data = (unsigned char *)&thread_id; | |||
| reporter_data.dataLen = sizeof(int32_t); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
| // time info | |||
| time_info_.modelId = this->Id(); | |||
| reporter_data.data = (unsigned char *)&time_info_; | |||
| reporter_data.dataLen = sizeof(struct timeInfo); | |||
| GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
| "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
| .append(std::to_string(this->Id())) | |||
| .append("_") | |||
| .append(std::to_string(current_data.index)); | |||
| std::string reported_data; | |||
| try { | |||
| reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
| } catch (std::exception &e) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
| } catch (...) { | |||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||
| } | |||
| reported_data.append(",") | |||
| .append("\n"); | |||
| prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||
| return SUCCESS; | |||
| } | |||
| @@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||
| task_desc_info.model_name = name_; | |||
| } | |||
| task_desc_info.op_name = op->GetName(); | |||
| task_desc_info.op_type = op->GetType(); | |||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
| task_desc_info.task_id = task->GetTaskID(); | |||
| task_desc_info.stream_id = task->GetStreamId(); | |||
| task_desc_info.shape_type = "static"; | |||
| task_desc_info.cur_iter_num = 0; | |||
| // task type | |||
| task_desc_info.task_type = kTaskTypeInvalid; | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| prof_mgr.GetOpInputOutputInfo(op, task_desc_info); | |||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| @@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||
| task_desc_info_.emplace_back(task_desc_info); | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| Status DavinciModel::DistributeTask() { | |||
| @@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||
| main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | |||
| } | |||
| Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||
| auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||
| for (auto &op_desc : all_op_desc) { | |||
| ComputeGraphDescInfo compute_graph_info; | |||
| if (!om_name_.empty()) { | |||
| compute_graph_info.model_name = om_name_; | |||
| } else { | |||
| compute_graph_info.model_name = name_; | |||
| } | |||
| std::vector<Format> format = { FORMAT_NULL }; | |||
| std::vector<std::vector<int64_t>> shape = { {0} }; | |||
| std::vector<DataType> data_type = { DT_UNDEFINED }; | |||
| compute_graph_info.op_name = op_desc.op_name; | |||
| compute_graph_info.op_type = op_desc.op_type; | |||
| compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||
| compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||
| compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||
| compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||
| compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||
| compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||
| if (iter != profiler_report_op_info_.end()) { | |||
| task_id = iter->second.first; | |||
| stream_id = iter->second.second; | |||
| } | |||
| compute_graph_info.task_id = task_id; | |||
| compute_graph_info.stream_id = stream_id; | |||
| graph_desc_info.emplace_back(compute_graph_info); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | |||
| if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | |||
| tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | |||
| @@ -840,9 +840,6 @@ class DavinciModel { | |||
| Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
| // get desc info of graph for profiling | |||
| Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||
| void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | |||
| Status InitL1DataDumperArgs(); | |||
| @@ -70,8 +70,6 @@ class NodeDoneCallback { | |||
| Status PrepareConstInputs(const NodeItem &node_item); | |||
| Status DumpDynamicNode(); | |||
| Status ProfilingReport(); | |||
| Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||
| std::vector<ComputeGraphDescInfo> &compute_graph_info); | |||
| Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | |||
| std::vector<TaskDescInfo> &task_desc_info); | |||
| GraphExecutionContext *graph_context_; | |||
| @@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||
| } | |||
| GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| task_desc_info = context_->GetProfilingTaskDescInfo(); | |||
| context_->ClearProfilingTaskDescInfo(); | |||
| return SUCCESS; | |||
| } | |||
| Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||
| std::vector<ComputeGraphDescInfo> &compute_graph_info) { | |||
| GE_CHECK_NOTNULL(node); | |||
| GE_CHECK_NOTNULL(model); | |||
| GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||
| compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||
| context_->ClearProfilingGraphDescInfo(); | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| for (auto &tmp_compute_graph_info : compute_graph_info) { | |||
| // default | |||
| if (op_desc->GetAllInputsSize() == 0) { | |||
| tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||
| tmp_compute_graph_info.input_shape = { {0} }; | |||
| tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||
| } | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
| if (input_desc == nullptr) { | |||
| continue; | |||
| } | |||
| tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
| tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
| tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
| } | |||
| if (op_desc->GetOutputsSize() == 0) { | |||
| tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||
| tmp_compute_graph_info.output_shape = { {0} }; | |||
| tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||
| } | |||
| for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
| GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
| tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
| } | |||
| for (auto &tmp_task_desc : task_desc_info) { | |||
| // save op input and output info | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); | |||
| } | |||
| return SUCCESS; | |||
| @@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { | |||
| return profiling_ret; | |||
| } | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); | |||
| if (profiling_ret != RT_ERROR_NONE) { | |||
| GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); | |||
| return profiling_ret; | |||
| } | |||
| auto &profiling_manager = ProfilingManager::Instance(); | |||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); | |||
| profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); | |||
| return SUCCESS; | |||
| } | |||
| @@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return FAILED; | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| } | |||
| @@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return FAILED; | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
| auto callback = [=, &context]() { | |||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | |||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | |||
| @@ -515,7 +515,7 @@ Status TaskContext::Synchronize() { | |||
| } | |||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| uint32_t task_type, uint32_t block_dim) { | |||
| const std::string &task_type, uint32_t block_dim) { | |||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| @@ -525,11 +525,11 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||
| const HybridModel *model = graph_context->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string op_name = op_desc->GetName(); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| TaskDescInfo tmp_task_desc_info; | |||
| tmp_task_desc_info.model_name = dynamic_model_name; | |||
| tmp_task_desc_info.op_name = op_name; | |||
| tmp_task_desc_info.op_name = op_desc->GetName(); | |||
| tmp_task_desc_info.op_type = op_desc->GetType(); | |||
| tmp_task_desc_info.block_dim = block_dim; | |||
| tmp_task_desc_info.task_type = task_type; | |||
| tmp_task_desc_info.task_id = task_id; | |||
| @@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { | |||
| return node_state_; | |||
| } | |||
| Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| const GraphExecutionContext *graph_context = GetExecutionContext(); | |||
| GE_CHECK_NOTNULL(graph_context); | |||
| const HybridModel *model = graph_context->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
| op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
| ComputeGraphDescInfo tmp_compute_graph_info; | |||
| tmp_compute_graph_info.model_name = dynamic_model_name; | |||
| tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
| tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
| tmp_compute_graph_info.task_id = task_id; | |||
| tmp_compute_graph_info.stream_id = stream_id; | |||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -113,13 +113,10 @@ class TaskContext { | |||
| void *handle_ = nullptr; | |||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| const std::string &task_type, uint32_t block_dim); | |||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
| const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||
| Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||
| void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||
| private: | |||
| TaskContext(GraphExecutionContext *execution_context, | |||
| NodeState *node_state, | |||
| @@ -141,7 +138,6 @@ class TaskContext { | |||
| uint32_t task_id_ = 0; | |||
| uint32_t stream_id_ = 0; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| }; | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||
| return SUCCESS; | |||
| } | |||
| string model_name; | |||
| string op_name; | |||
| TaskDescInfo tmp_task_desc_info; | |||
| uint32_t model_id; | |||
| uint32_t block_dim; | |||
| if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||
| if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", | |||
| tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); | |||
| TaskDescInfo tmp_task_desc_info; | |||
| tmp_task_desc_info.model_name = model_name; | |||
| tmp_task_desc_info.op_name = op_name; | |||
| tmp_task_desc_info.block_dim = block_dim; | |||
| tmp_task_desc_info.task_id = task_id; | |||
| tmp_task_desc_info.stream_id = stream_id; | |||
| tmp_task_desc_info.shape_type = shape_type; | |||
| tmp_task_desc_info.cur_iter_num = 0; | |||
| tmp_task_desc_info.task_type = op_task->GetTaskType(); | |||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||
| auto &profiling_manager = ProfilingManager::Instance(); | |||
| profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); | |||
| profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace | |||
| @@ -23,6 +23,7 @@ | |||
| #include "aicpu/common/aicpu_task_struct.h" | |||
| #include "common/dump/dump_manager.h" | |||
| #include "common/dump/dump_op.h" | |||
| #include "common/profiling/profiling_manager.h" | |||
| #include "common/formats/formats.h" | |||
| #include "common/math/math_util.h" | |||
| #include "framework/common/debug/log.h" | |||
| @@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||
| model_id_ = model_id; | |||
| } | |||
| Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||
| uint32_t &block_dim) { | |||
| model_name = model_name_; | |||
| model_id = model_id_; | |||
| block_dim = block_dim_; | |||
| Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GE_CHECK_NOTNULL(op_desc_); | |||
| op_name = op_desc_->GetName(); | |||
| string op_name = op_desc_->GetName(); | |||
| GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||
| model_id = model_id_; | |||
| task_desc_info.model_name = model_name_; | |||
| task_desc_info.block_dim = block_dim_; | |||
| task_desc_info.task_id = task_id; | |||
| task_desc_info.stream_id = stream_id; | |||
| task_desc_info.op_name = op_name; | |||
| task_desc_info.op_type = op_desc_->GetType(); | |||
| auto &prof_mgr = ProfilingManager::Instance(); | |||
| prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); | |||
| return SUCCESS; | |||
| } | |||
| Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||
| return UNSUPPORTED; | |||
| } | |||
| @@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
| return UNSUPPORTED; | |||
| } | |||
| uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||
| const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||
| TbeOpTask::~TbeOpTask() { | |||
| if (sm_desc_ != nullptr) { | |||
| @@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||
| const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | |||
| uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||
| const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||
| void TbeOpTask::SetHandle(void *handle) { | |||
| this->handle_ = handle; | |||
| @@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||
| return DoUpdateArgTable(param, false); | |||
| } | |||
| uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||
| const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||
| void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||
| arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | |||
| @@ -43,7 +43,7 @@ class OpTask { | |||
| const vector<GeTensorDesc> &output_desc); | |||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
| void SetModelArgs(std::string model_name, uint32_t model_id); | |||
| Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
| const OpDescPtr &GetOpdesc() const {return op_desc_;} | |||
| Status OpenDump(rtStream_t stream); | |||
| virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | |||
| @@ -52,7 +52,7 @@ class OpTask { | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| std::vector<DataBuffer> &output_buffers, | |||
| rtStream_t stream); | |||
| virtual uint32_t GetTaskType() const; | |||
| virtual const std::string &GetTaskType() const; | |||
| protected: | |||
| Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | |||
| @@ -88,7 +88,7 @@ class TbeOpTask : public OpTask { | |||
| size_t GetArgSize() const; | |||
| const std::string &GetStubName() const; | |||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | |||
| uint32_t GetTaskType() const override; | |||
| const std::string &GetTaskType() const override; | |||
| void SetHandle(void *handle); | |||
| private: | |||
| @@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask { | |||
| ~AiCpuBaseTask() override; | |||
| UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | |||
| Status UpdateArgTable(const SingleOpModelParam ¶m) override; | |||
| uint32_t GetTaskType() const override; | |||
| const std::string &GetTaskType() const override; | |||
| protected: | |||
| Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
| @@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | |||
| // profiling data | |||
| const uint32_t kTaskTypeAicore = 0; | |||
| const uint32_t kTaskTypeAicpu = 1; | |||
| const uint32_t kTaskTypeInvalid = 0xFFFF; | |||
| const std::string kTaskTypeAicore = "AI_CORE"; | |||
| const std::string kTaskTypeAicpu = "AI_CPU"; | |||
| const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||
| // Data cache, including data address and length | |||
| struct DataBuffer { | |||
| @@ -251,27 +251,19 @@ struct Options { | |||
| struct TaskDescInfo { | |||
| std::string model_name; | |||
| std::string op_name; | |||
| std::string op_type; | |||
| uint32_t block_dim; | |||
| uint32_t task_id; | |||
| uint32_t stream_id; | |||
| std::string shape_type; | |||
| int64_t cur_iter_num; | |||
| uint32_t task_type; | |||
| }; | |||
| // Profiling info of graph | |||
| struct ComputeGraphDescInfo { | |||
| std::string model_name; | |||
| std::string op_name; | |||
| std::string op_type; | |||
| std::string task_type; | |||
| std::vector<Format> input_format; | |||
| std::vector<std::vector<int64_t>> input_shape; | |||
| std::vector<DataType> input_data_type; | |||
| std::vector<Format> output_format; | |||
| std::vector<std::vector<int64_t>> output_shape; | |||
| std::vector<DataType> output_data_type; | |||
| uint32_t task_id; | |||
| uint32_t stream_id; | |||
| }; | |||
| struct OpDescInfo { | |||
| @@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES | |||
| ) | |||
| set(SINGLE_OP_TEST_FILES | |||
| #"single_op/single_op_model_unittest.cc" | |||
| "single_op/single_op_model_unittest.cc" | |||
| "single_op/single_op_manager_unittest.cc" | |||
| "single_op/stream_resource_unittest.cc" | |||
| "single_op/single_op_task_unittest.cc" | |||
| @@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
| model.SinkModelProfile(); | |||
| } | |||
| TEST_F(UtestDavinciModel, Sink_time_profile) { | |||
| ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
| DavinciModel model(0, nullptr); | |||
| InputData current_data; | |||
| model.SinkTimeProfile(current_data); | |||
| } | |||
| } // namespace ge | |||
| @@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { | |||
| void TearDown() {} | |||
| }; | |||
| //rt api stub | |||
| rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| /* | |||
| TEST_F(UtestSingleOpModel, test_init_model) { | |||
| string model_data_str = "123456789"; | |||
| @@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { | |||
| std::mutex stream_mu_; | |||
| rtStream_t stream_ = nullptr; | |||
| SingleOp single_op(&stream_mu_, stream_); | |||
| ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||
| // SingleOp single_op(&stream_mu_, stream_); | |||
| // | |||
| // ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||
| } | |||
| /* | |||
| TEST_F(UtestSingleOpModel, test_build_kernel_task) { | |||
| @@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { | |||
| ASSERT_EQ(op_model.Init(), FAILED); | |||
| } | |||
| */ | |||
| /* | |||
| TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||
| string model_data_str = "123456789"; | |||
| SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | |||
| @@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||
| ASSERT_EQ(op.arg_table_[1].size(), 1); | |||
| ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | |||
| } | |||
| */ | |||
| TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { | |||
| string name = "relu"; | |||
| string type = "relu"; | |||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||
| op_desc->SetStreamId(0); | |||
| op_desc->SetId(0); | |||
| TbeOpTask task; | |||
| task.op_desc_ = op_desc; | |||
| task.model_name_ = "resnet_50"; | |||
| task.model_id_ = 1; | |||
| TaskDescInfo task_desc_info; | |||
| uint32_t model_id; | |||
| task.GetProfilingArgs(task_desc_info, model_id); | |||
| ASSERT_EQ(task_desc_info.model_name, "resnet_50"); | |||
| ASSERT_EQ(model_id, 1); | |||
| } | |||