@@ -20,6 +20,8 @@ | |||
#include "framework/common/debug/log.h" | |||
#include "framework/common/string_util.h" | |||
#include "graph/ge_context.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "graph/types.h" | |||
#include "runtime/base.h" | |||
#include "graph/load/model_manager/davinci_model.h" | |||
@@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point"; | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
const size_t kReportMaxLen = 2048; | |||
const int32_t kMaxDeviceNum = 256; | |||
const uint32_t kInteval = 2; | |||
const std::string kConfigNumsdev = "devNums"; | |||
const std::string kConfigDevIdList = "devIdList"; | |||
const std::string kProfStart = "prof_start"; | |||
const std::string kProfStop = "prof_stop"; | |||
const std::string kProfModelSubscribe = "prof_model_subscribe"; | |||
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||
const std::string kModelName = "model_name"; | |||
const std::string kModelId = "model_id"; | |||
const std::string kOpNmae = "op_name"; | |||
const std::string kOptype = "op_type"; | |||
const std::string kBlockDim = "block_dims"; | |||
const std::string kTaskId = "task_id"; | |||
const std::string kStreamId = "stream_id"; | |||
const std::string kShapeType = "shape_type"; | |||
const std::string kCurIterNum = "cur_iter_num"; | |||
const std::string kTaskType = "task_type"; | |||
const std::string kInput = "input"; | |||
const std::string kOutput = "output"; | |||
const std::string kFormat = "format"; | |||
const std::string kDataType = "data_type"; | |||
const std::string kShape = "shape"; | |||
const std::string kIdx = "idx"; | |||
#endif | |||
} // namespace | |||
@@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
#endif | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo( | |||
const TaskDescInfo &task, Json &task_json) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::string data; | |||
for (const auto &task : task_desc_info) { | |||
std::string model_name = task.model_name; | |||
std::string op_name = task.op_name; | |||
uint32_t block_dim = task.block_dim; | |||
uint32_t task_id = task.task_id; | |||
uint32_t stream_id = task.stream_id; | |||
std::string shape_type = task.shape_type; | |||
int64_t cur_iter_num = task.cur_iter_num; | |||
uint32_t task_type = task.task_type; | |||
data = model_name.append(" ") | |||
.append(op_name).append(" ") | |||
.append(std::to_string(block_dim)).append(" ") | |||
.append(std::to_string(task_id)).append(" ") | |||
.append(std::to_string(stream_id)).append(" ") | |||
.append(std::to_string(model_id)).append(" ") | |||
.append(shape_type).append(" ") | |||
.append(std::to_string(cur_iter_num)).append(" ") | |||
.append(std::to_string(task_type)).append("\n"); | |||
ReporterData reporter_data{}; | |||
reporter_data.deviceId = device_id; | |||
reporter_data.data = (unsigned char *)data.c_str(); | |||
reporter_data.dataLen = data.size(); | |||
int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info")); | |||
if (ret != EOK) { | |||
GELOGE(ret, "Report data tag of task_desc_info memcpy error!"); | |||
return; | |||
} | |||
int32_t cb_ret = CallMsprofReport(reporter_data); | |||
if (cb_ret != 0) { | |||
GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret); | |||
return; | |||
} | |||
for (size_t i = 0; i < task.input_format.size(); i++) { | |||
Json tmp_input; | |||
tmp_input[kIdx] = i; | |||
Format format = task.input_format[i]; | |||
tmp_input[kFormat] = TypeUtils::FormatToSerialString(format); | |||
DataType data_type = task.input_data_type[i]; | |||
tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||
tmp_input[kShape] = task.input_shape[i]; | |||
task_json[kInput] += tmp_input; | |||
} | |||
for (size_t i = 0; i < task.output_format.size(); i++) { | |||
Json tmp_output; | |||
tmp_output[kIdx] = i; | |||
Format format = task.output_format[i]; | |||
tmp_output[kFormat] = TypeUtils::FormatToSerialString(format); | |||
DataType data_type = task.output_data_type[i]; | |||
tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type); | |||
tmp_output[kShape] = task.output_shape[i]; | |||
task_json[kOutput] += tmp_output; | |||
} | |||
data.clear(); | |||
#endif | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( | |||
uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( | |||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
std::string data; | |||
for (const auto &graph : compute_graph_desc_info) { | |||
data.append("model_name:") | |||
.append(graph.model_name) | |||
.append(" op_name:") | |||
.append(graph.op_name) | |||
.append(" op_type:") | |||
.append(graph.op_type); | |||
for (size_t i = 0; i < graph.input_format.size(); ++i) { | |||
data.append(" input_id:") | |||
.append(std::to_string(i)) | |||
.append(" input_format:") | |||
.append(std::to_string(graph.input_format.at(i))) | |||
.append(" input_data_type:") | |||
.append(std::to_string(graph.input_data_type.at(i))) | |||
.append(" input_shape:\""); | |||
size_t input_shape_len = graph.input_shape.at(i).size(); | |||
if (input_shape_len == 0) { | |||
data.append(""); | |||
} else if (input_shape_len == 1) { | |||
data.append(std::to_string(graph.input_shape.at(i).at(0))); | |||
} else { | |||
for (size_t j = 0; j < input_shape_len - 1; ++j) { | |||
data.append(std::to_string(graph.input_shape.at(i).at(j))).append(","); | |||
} | |||
data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1))); | |||
} | |||
data.append("\""); | |||
} | |||
for (size_t i = 0; i < graph.output_format.size(); ++i) { | |||
data.append(" output_id:") | |||
.append(std::to_string(i)) | |||
.append(" output_format:") | |||
.append(std::to_string(graph.output_format.at(i))) | |||
.append(" output_data_type:") | |||
.append(std::to_string(graph.output_data_type.at(i))) | |||
.append(" output_shape:\""); | |||
size_t output_shape_len = graph.output_shape.at(i).size(); | |||
if (output_shape_len == 0) { | |||
data.append(""); | |||
} else if (output_shape_len == 1) { | |||
data.append(std::to_string(graph.output_shape.at(i).at(0))); | |||
} else { | |||
for (size_t j = 0; j < output_shape_len - 1; ++j) { | |||
data.append(std::to_string(graph.output_shape.at(i).at(j))).append(","); | |||
} | |||
data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1))); | |||
} | |||
data.append("\""); | |||
for (const auto &task : task_desc_info) { | |||
Json task_info; | |||
task_info[kModelName] = task.model_name; | |||
task_info[kModelId] = model_id; | |||
task_info[kOpNmae] = task.op_name; | |||
task_info[kOptype] = task.op_type; | |||
task_info[kBlockDim] = task.block_dim; | |||
task_info[kTaskType] = task.task_type; | |||
task_info[kTaskId] = task.task_id; | |||
task_info[kStreamId] = task.stream_id; | |||
task_info[kCurIterNum] = task.cur_iter_num; | |||
task_info[kShapeType] = task.shape_type; | |||
ProfilingOpInputOutInfo(task, task_info); | |||
std::string reported_data; | |||
try { | |||
reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
} catch (std::exception &e) { | |||
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
return ; | |||
} catch (...) { | |||
GELOGE(FAILED, "Failed to convert JSON to string."); | |||
return; | |||
} | |||
data.append(" model_id:").append(std::to_string(model_id)); | |||
data.append(" task_id:").append(std::to_string(graph.task_id)); | |||
data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||
data.append("\n"); | |||
GraphDescReport(device_id, data); | |||
data.clear(); | |||
reported_data.append(",") | |||
.append("\n"); | |||
ReportData(device_id, reported_data, "task_desc_info"); | |||
} | |||
#endif | |||
} | |||
void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData( | |||
const int32_t &device_id, const string &data, const string &tag_name) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
ReporterData reporter_data{}; | |||
int ret = -1; | |||
@@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d | |||
size_t index = data.size() / kReportMaxLen; | |||
if (index >= 1) { | |||
reporter_data.deviceId = device_id; | |||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||
for (size_t i = 0; i < index; ++i) { | |||
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i; | |||
reporter_data.dataLen = kReportMaxLen; | |||
cb_ret = CallMsprofReport(reporter_data); | |||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
return;); | |||
} | |||
reporter_data.dataLen = data.size() - kReportMaxLen * index; | |||
if (reporter_data.dataLen != 0) { | |||
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index; | |||
cb_ret = CallMsprofReport(reporter_data); | |||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
return;); | |||
} | |||
} else { | |||
reporter_data.deviceId = device_id; | |||
reporter_data.data = (unsigned char *)data.c_str(); | |||
reporter_data.dataLen = data.size(); | |||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info")); | |||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;); | |||
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size()); | |||
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;); | |||
cb_ret = CallMsprofReport(reporter_data); | |||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;); | |||
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret); | |||
return;); | |||
} | |||
#endif | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | |||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | |||
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
int32_t logic_device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||
@@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
GELOGD("current logic_device_id:%d", logic_device_id); | |||
GELOGD("start ProfilingTaskDescInfo."); | |||
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | |||
GELOGD("start ProfilingGraphDescInfo."); | |||
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||
GELOGD("Report profiling data for GE end."); | |||
#endif | |||
} | |||
@@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs | |||
static_cast<void *>(&reporter_data), sizeof(ReporterData)); | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( | |||
const OpDescPtr &op, TaskDescInfo &task_desc_info) const { | |||
std::vector<Format> input_format; | |||
std::vector<std::vector<int64_t>> input_shape; | |||
std::vector<DataType> input_data_type; | |||
for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||
GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||
if (input_tensor_desc == nullptr) { | |||
continue; | |||
} | |||
input_format.emplace_back(input_tensor_desc->GetFormat()); | |||
input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||
input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||
} | |||
std::vector<Format> output_format; | |||
std::vector<std::vector<int64_t>> output_shape; | |||
std::vector<DataType> output_data_type; | |||
for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||
GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||
if (output_tensor_desc == nullptr) { | |||
continue; | |||
} | |||
output_format.emplace_back(output_tensor_desc->GetFormat()); | |||
output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||
output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||
} | |||
std::vector<Format> format_default = { FORMAT_NULL }; | |||
std::vector<std::vector<int64_t>> shape_default = { {0} }; | |||
std::vector<DataType> data_type_default = { DT_UNDEFINED }; | |||
task_desc_info.input_format = input_format.empty() ? format_default : input_format; | |||
task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; | |||
task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; | |||
task_desc_info.output_format = output_format.empty() ? format_default : output_format; | |||
task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; | |||
task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( | |||
std::string &fp_point, std::string &bp_point) { | |||
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init | |||
@@ -54,6 +54,8 @@ namespace { | |||
} // namespace | |||
namespace ge { | |||
class OpDesc; | |||
using OpDescPtr = std::shared_ptr<OpDesc>; | |||
struct DeviceSubsInfo { | |||
uint64_t module; | |||
uint32_t subscribe_count; | |||
@@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
bool ProfilingModelExecuteOn() const; | |||
// is_execute_profiling_ only used by ge option and env | |||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info); | |||
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | |||
const int32_t &device_id); | |||
void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, | |||
const int32_t &device_id); | |||
void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json); | |||
Status PluginInit() const; | |||
void PluginUnInit() const; | |||
Status CallMsprofReport(ReporterData &reporter_data) const; | |||
@@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; } | |||
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } | |||
void GetFpBpPoint(std::string &fp_point, std::string &bp_point); | |||
void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; | |||
void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name); | |||
private: | |||
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf); | |||
Status ParseOptions(const std::string &options); | |||
@@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para, | |||
vector<int32_t> &device_list); | |||
uint64_t GetProfilingModule(); | |||
void GraphDescReport(const int32_t &device_id, const string &data); | |||
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list); | |||
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); | |||
@@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||
// subgraph of dynamic graph no need to find index, has been found in parent graph | |||
if (IsSubGraphOfDynamicGraph(graph)) { | |||
GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||
GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a; | |||
const uint32_t kEndOfSequenceNew = 507005; | |||
const int32_t kModelAbortNormal = 0x0704000e; | |||
const int32_t kModelAbortNormalNew = 507024; | |||
const uint32_t kInteval = 2; | |||
const char *const kModelName = "model_name"; | |||
const char *const kModeleId = "model_id"; | |||
const char *const kLoadStartTime = "load_start_time"; | |||
const char *const kLoadEndTime = "load_end_time"; | |||
const char *const kFusionOpInfo = "fusion_op_info"; | |||
const char *const kFusionOpName = "fusion_op_name"; | |||
const char *const kOriginalOpNum = "origin_op_num"; | |||
const char *const kOriginalOpName = "origin_op_name"; | |||
const char *const kStreamId = "stream_id"; | |||
const char *const kFusionOpMemoryInfo = "memory_info"; | |||
const char *const kInputSize = "input_size"; | |||
const char *const kOutputSize = "output_size"; | |||
const char *const kWeightSize = "weight_size"; | |||
const char *const kWorkSpaceSize = "workspace_size"; | |||
const char *const kTotalSize = "total_size"; | |||
const char *const kTaskCount = "task_count"; | |||
const char *const kTaskId = "task_id"; | |||
const char* const kRequestId = "request_id"; | |||
const char* const kThreadId = "thread_id"; | |||
const char* const kInputBeginTime = "input_begin_time"; | |||
const char* const kInputEndTime = "input_end_time"; | |||
const char* const kInferBeginTime = "infer_begin_time"; | |||
const char* const kInferEndTime = "infer_end_time"; | |||
const char* const kOutputBeginTime = "output_start_time"; | |||
const char* const kOutputEndTime = "output_end_time"; | |||
inline bool IsDataOp(const std::string &node_type) { | |||
return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | |||
@@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
} | |||
Status DavinciModel::ReportProfilingData() { | |||
std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | |||
Status ret = GetComputeGraphInfo(compute_graph_desc_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetComputeGraphInfo failed."); | |||
return ret; | |||
} | |||
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | |||
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo()); | |||
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | |||
return SUCCESS; | |||
@@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() { | |||
} | |||
Status DavinciModel::SinkModelProfile() { | |||
// profiling plugin must be registered | |||
auto &prof_mgr = ProfilingManager::Instance(); | |||
ReporterData reporter_data{}; | |||
// report model data tag name | |||
std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
return FAILED, "Sink model tag memcpy error."); | |||
// Model Header | |||
std::string name = om_name_.empty() ? name_ : om_name_; | |||
size_t name_len = name.size(); | |||
reporter_data.deviceId = device_id_; | |||
reporter_data.data = (unsigned char *)&name_len; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
reporter_data.data = (unsigned char *)name.c_str(); | |||
reporter_data.dataLen = name.size(); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
uint32_t model_id = this->Id(); | |||
reporter_data.data = (unsigned char *)&model_id; | |||
reporter_data.dataLen = sizeof(uint32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
// Load Start/End Time | |||
int64_t start_time = this->GetLoadBeginTime(); | |||
reporter_data.data = (unsigned char *)&start_time; | |||
reporter_data.dataLen = sizeof(int64_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
int64_t end_time = this->GetLoadEndTime(); | |||
reporter_data.data = (unsigned char *)&end_time; | |||
reporter_data.dataLen = sizeof(int64_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
Json model_load_info; | |||
model_load_info[kModelName] = name; | |||
model_load_info[kModeleId] = model_id; | |||
model_load_info[kLoadStartTime] = start_time; | |||
model_load_info[kLoadEndTime] = end_time; | |||
// fusion op info | |||
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | |||
using Range = std::pair<CIT, CIT>; | |||
for (const ProfileInfo &profile : profile_list_) { | |||
// op name after fusion | |||
Json fusion_op_info; | |||
string fusion_op_name = profile.fusion_info.op_name; | |||
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||
reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||
reporter_data.dataLen = fusion_op_name_len; | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
// original op name before fusion | |||
uint32_t op_num = profile.fusion_info.original_op_names.size(); | |||
reporter_data.data = (unsigned char *)&op_num; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
vector<string> original_name; | |||
for (uint32_t k = 0; k < op_num; k++) { | |||
std::string op_name = profile.fusion_info.original_op_names[k]; | |||
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||
reporter_data.data = (unsigned char *)&op_name_len; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
reporter_data.data = (unsigned char *)op_name.c_str(); | |||
reporter_data.dataLen = op_name_len; | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
} | |||
// stream id info | |||
original_name.emplace_back(profile.fusion_info.original_op_names[k]); | |||
} | |||
uint32_t stream_id = 0; | |||
auto iter = profiler_report_op_info_.find(fusion_op_name); | |||
if (iter != profiler_report_op_info_.end()) { | |||
stream_id = iter->second.second; | |||
} | |||
reporter_data.data = (unsigned char *)&stream_id; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
// memory info | |||
reporter_data.data = (unsigned char *)&profile.memory_info; | |||
reporter_data.dataLen = sizeof(profile.memory_info); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
// task info | |||
reporter_data.data = (unsigned char *)&profile.task_count; | |||
reporter_data.dataLen = sizeof(uint32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
fusion_op_info[kFusionOpName] = fusion_op_name; | |||
fusion_op_info[kOriginalOpNum] = op_num; | |||
fusion_op_info[kOriginalOpName] = original_name; | |||
fusion_op_info[kStreamId] = stream_id; | |||
fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size; | |||
fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size; | |||
fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size; | |||
fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size; | |||
fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size; | |||
fusion_op_info[kTaskCount] = profile.task_count; | |||
vector<uint32_t> task_id; | |||
Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | |||
for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||
uint32_t task_id = idx->second; | |||
reporter_data.data = (unsigned char *)&task_id; | |||
reporter_data.dataLen = sizeof(uint32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
task_id.push_back(idx->second); | |||
} | |||
fusion_op_info[kTaskId] = task_id; | |||
model_load_info[kFusionOpInfo] += fusion_op_info; | |||
} | |||
std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||
std::string reported_data; | |||
try { | |||
reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
} catch (std::exception &e) { | |||
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
} catch (...) { | |||
GELOGE(FAILED, "Failed to convert JSON to string."); | |||
} | |||
reported_data.append(",") | |||
.append("\n"); | |||
prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||
// profiling plugin must be registered | |||
auto &prof_mgr = ProfilingManager::Instance(); | |||
ReporterData reporter_data{}; | |||
string name = om_name_.empty() ? name_ : om_name_; | |||
Json model_time_info; | |||
model_time_info[kModelName] = name; | |||
model_time_info[kModeleId] = this->Id(); | |||
model_time_info[kRequestId] = current_data.request_id; | |||
model_time_info[kThreadId] = GetDataInputTid(); | |||
model_time_info[kInputBeginTime] = time_info_.processBeginTime; | |||
model_time_info[kInputEndTime] = time_info_.processEndTime; | |||
model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime; | |||
model_time_info[kInferEndTime] = time_info_.inferenceEndTime; | |||
model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime; | |||
model_time_info[kOutputEndTime] = time_info_.dumpEndTime; | |||
// report model data tag name | |||
std::string tag_name; | |||
tag_name.append("model_time_info_") | |||
.append(std::to_string(this->Id())) | |||
.append("_") | |||
.append(std::to_string(current_data.index)); | |||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
return FAILED, "Sink model tag memcpy error."); | |||
// device id | |||
reporter_data.deviceId = device_id_; | |||
// Model Header | |||
string name; | |||
if (!om_name_.empty()) { | |||
name = om_name_; | |||
} else { | |||
name = name_; | |||
} | |||
size_t name_len = name.size(); | |||
reporter_data.data = (unsigned char *)&name_len; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
reporter_data.data = (unsigned char *)name.c_str(); | |||
reporter_data.dataLen = name.size(); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u.", this->Id()); | |||
// request id | |||
uint64_t request_id = current_data.request_id; | |||
reporter_data.data = (unsigned char *)&request_id; | |||
reporter_data.dataLen = sizeof(uint32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
// thread id | |||
int32_t thread_id = GetDataInputTid(); | |||
reporter_data.data = (unsigned char *)&thread_id; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
// time info | |||
time_info_.modelId = this->Id(); | |||
reporter_data.data = (unsigned char *)&time_info_; | |||
reporter_data.dataLen = sizeof(struct timeInfo); | |||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index); | |||
.append(std::to_string(this->Id())) | |||
.append("_") | |||
.append(std::to_string(current_data.index)); | |||
std::string reported_data; | |||
try { | |||
reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | |||
} catch (std::exception &e) { | |||
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||
} catch (...) { | |||
GELOGE(FAILED, "Failed to convert JSON to string."); | |||
} | |||
reported_data.append(",") | |||
.append("\n"); | |||
prof_mgr.ReportData(device_id_, reported_data, tag_name); | |||
return SUCCESS; | |||
} | |||
@@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||
task_desc_info.model_name = name_; | |||
} | |||
task_desc_info.op_name = op->GetName(); | |||
task_desc_info.op_type = op->GetType(); | |||
task_desc_info.block_dim = task_def.kernel().block_dim(); | |||
task_desc_info.task_id = task->GetTaskID(); | |||
task_desc_info.stream_id = task->GetStreamId(); | |||
task_desc_info.shape_type = "static"; | |||
task_desc_info.cur_iter_num = 0; | |||
// task type | |||
task_desc_info.task_type = kTaskTypeInvalid; | |||
auto &prof_mgr = ProfilingManager::Instance(); | |||
prof_mgr.GetOpInputOutputInfo(op, task_desc_info); | |||
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||
const domi::KernelDef &kernel_def = task_def.kernel(); | |||
@@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo | |||
task_desc_info_.emplace_back(task_desc_info); | |||
} | |||
} | |||
return; | |||
} | |||
Status DavinciModel::DistributeTask() { | |||
@@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||
main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | |||
} | |||
Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||
auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||
for (auto &op_desc : all_op_desc) { | |||
ComputeGraphDescInfo compute_graph_info; | |||
if (!om_name_.empty()) { | |||
compute_graph_info.model_name = om_name_; | |||
} else { | |||
compute_graph_info.model_name = name_; | |||
} | |||
std::vector<Format> format = { FORMAT_NULL }; | |||
std::vector<std::vector<int64_t>> shape = { {0} }; | |||
std::vector<DataType> data_type = { DT_UNDEFINED }; | |||
compute_graph_info.op_name = op_desc.op_name; | |||
compute_graph_info.op_type = op_desc.op_type; | |||
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||
if (iter != profiler_report_op_info_.end()) { | |||
task_id = iter->second.first; | |||
stream_id = iter->second.second; | |||
} | |||
compute_graph_info.task_id = task_id; | |||
compute_graph_info.stream_id = stream_id; | |||
graph_desc_info.emplace_back(compute_graph_info); | |||
} | |||
return SUCCESS; | |||
} | |||
void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { | |||
if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { | |||
tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; | |||
@@ -840,9 +840,6 @@ class DavinciModel { | |||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
// get desc info of graph for profiling | |||
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | |||
void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | |||
Status InitL1DataDumperArgs(); | |||
@@ -70,8 +70,6 @@ class NodeDoneCallback { | |||
Status PrepareConstInputs(const NodeItem &node_item); | |||
Status DumpDynamicNode(); | |||
Status ProfilingReport(); | |||
Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||
std::vector<ComputeGraphDescInfo> &compute_graph_info); | |||
Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | |||
std::vector<TaskDescInfo> &task_desc_info); | |||
GraphExecutionContext *graph_context_; | |||
@@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||
} | |||
GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | |||
auto &prof_mgr = ProfilingManager::Instance(); | |||
task_desc_info = context_->GetProfilingTaskDescInfo(); | |||
context_->ClearProfilingTaskDescInfo(); | |||
return SUCCESS; | |||
} | |||
Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, | |||
std::vector<ComputeGraphDescInfo> &compute_graph_info) { | |||
GE_CHECK_NOTNULL(node); | |||
GE_CHECK_NOTNULL(model); | |||
GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||
compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||
context_->ClearProfilingGraphDescInfo(); | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
for (auto &tmp_compute_graph_info : compute_graph_info) { | |||
// default | |||
if (op_desc->GetAllInputsSize() == 0) { | |||
tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||
tmp_compute_graph_info.input_shape = { {0} }; | |||
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||
} | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
if (input_desc == nullptr) { | |||
continue; | |||
} | |||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
} | |||
if (op_desc->GetOutputsSize() == 0) { | |||
tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||
tmp_compute_graph_info.output_shape = { {0} }; | |||
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||
} | |||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
} | |||
for (auto &tmp_task_desc : task_desc_info) { | |||
// save op input and output info | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc); | |||
} | |||
return SUCCESS; | |||
@@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() { | |||
return profiling_ret; | |||
} | |||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); | |||
if (profiling_ret != RT_ERROR_NONE) { | |||
GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); | |||
return profiling_ret; | |||
} | |||
auto &profiling_manager = ProfilingManager::Instance(); | |||
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info); | |||
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info); | |||
return SUCCESS; | |||
} | |||
@@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
uint32_t stream_id = 0; | |||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
return FAILED; | |||
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
} | |||
@@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
uint32_t stream_id = 0; | |||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
return FAILED; | |||
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
auto callback = [=, &context]() { | |||
GELOGD("Node[%s] callback start.", node_name_.c_str()); | |||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | |||
@@ -515,7 +515,7 @@ Status TaskContext::Synchronize() { | |||
} | |||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
uint32_t task_type, uint32_t block_dim) { | |||
const std::string &task_type, uint32_t block_dim) { | |||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
const NodeItem &node_item = GetNodeItem(); | |||
auto op_desc = node_item.GetOpDesc(); | |||
@@ -525,11 +525,11 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||
const HybridModel *model = graph_context->model; | |||
GE_CHECK_NOTNULL(model); | |||
std::string op_name = op_desc->GetName(); | |||
std::string dynamic_model_name = model->GetModelName(); | |||
TaskDescInfo tmp_task_desc_info; | |||
tmp_task_desc_info.model_name = dynamic_model_name; | |||
tmp_task_desc_info.op_name = op_name; | |||
tmp_task_desc_info.op_name = op_desc->GetName(); | |||
tmp_task_desc_info.op_type = op_desc->GetType(); | |||
tmp_task_desc_info.block_dim = block_dim; | |||
tmp_task_desc_info.task_type = task_type; | |||
tmp_task_desc_info.task_id = task_id; | |||
@@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const { | |||
return node_state_; | |||
} | |||
Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
const NodeItem &node_item = GetNodeItem(); | |||
auto op_desc = node_item.GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
const GraphExecutionContext *graph_context = GetExecutionContext(); | |||
GE_CHECK_NOTNULL(graph_context); | |||
const HybridModel *model = graph_context->model; | |||
GE_CHECK_NOTNULL(model); | |||
std::string dynamic_model_name = model->GetModelName(); | |||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
ComputeGraphDescInfo tmp_compute_graph_info; | |||
tmp_compute_graph_info.model_name = dynamic_model_name; | |||
tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
tmp_compute_graph_info.task_id = task_id; | |||
tmp_compute_graph_info.stream_id = stream_id; | |||
compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace hybrid | |||
} // namespace ge |
@@ -113,13 +113,10 @@ class TaskContext { | |||
void *handle_ = nullptr; | |||
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
const std::string &task_type, uint32_t block_dim); | |||
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||
Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||
void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||
private: | |||
TaskContext(GraphExecutionContext *execution_context, | |||
NodeState *node_state, | |||
@@ -141,7 +138,6 @@ class TaskContext { | |||
uint32_t task_id_ = 0; | |||
uint32_t stream_id_ = 0; | |||
std::vector<TaskDescInfo> task_desc_info; | |||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
}; | |||
} // namespace hybrid | |||
} // namespace ge | |||
@@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||
return SUCCESS; | |||
} | |||
string model_name; | |||
string op_name; | |||
TaskDescInfo tmp_task_desc_info; | |||
uint32_t model_id; | |||
uint32_t block_dim; | |||
if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||
if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) { | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | |||
return ACL_ERROR_GE_PARAM_INVALID; | |||
} | |||
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||
std::vector<TaskDescInfo> task_desc_info; | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGD("ProfilingReport of op[%s] model[%s] start.", | |||
tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str()); | |||
TaskDescInfo tmp_task_desc_info; | |||
tmp_task_desc_info.model_name = model_name; | |||
tmp_task_desc_info.op_name = op_name; | |||
tmp_task_desc_info.block_dim = block_dim; | |||
tmp_task_desc_info.task_id = task_id; | |||
tmp_task_desc_info.stream_id = stream_id; | |||
tmp_task_desc_info.shape_type = shape_type; | |||
tmp_task_desc_info.cur_iter_num = 0; | |||
tmp_task_desc_info.task_type = op_task->GetTaskType(); | |||
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||
task_desc_info.emplace_back(tmp_task_desc_info); | |||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
std::vector<TaskDescInfo> task_desc_info; | |||
task_desc_info.emplace_back(tmp_task_desc_info); | |||
auto &profiling_manager = ProfilingManager::Instance(); | |||
profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info); | |||
profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||
return SUCCESS; | |||
} | |||
} // namespace | |||
@@ -23,6 +23,7 @@ | |||
#include "aicpu/common/aicpu_task_struct.h" | |||
#include "common/dump/dump_manager.h" | |||
#include "common/dump/dump_op.h" | |||
#include "common/profiling/profiling_manager.h" | |||
#include "common/formats/formats.h" | |||
#include "common/math/math_util.h" | |||
#include "framework/common/debug/log.h" | |||
@@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||
model_id_ = model_id; | |||
} | |||
Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||
uint32_t &block_dim) { | |||
model_name = model_name_; | |||
model_id = model_id_; | |||
block_dim = block_dim_; | |||
Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) { | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GE_CHECK_NOTNULL(op_desc_); | |||
op_name = op_desc_->GetName(); | |||
string op_name = op_desc_->GetName(); | |||
GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||
model_id = model_id_; | |||
task_desc_info.model_name = model_name_; | |||
task_desc_info.block_dim = block_dim_; | |||
task_desc_info.task_id = task_id; | |||
task_desc_info.stream_id = stream_id; | |||
task_desc_info.op_name = op_name; | |||
task_desc_info.op_type = op_desc_->GetType(); | |||
auto &prof_mgr = ProfilingManager::Instance(); | |||
prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info); | |||
return SUCCESS; | |||
} | |||
Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||
return UNSUPPORTED; | |||
} | |||
@@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
return UNSUPPORTED; | |||
} | |||
uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||
const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||
TbeOpTask::~TbeOpTask() { | |||
if (sm_desc_ != nullptr) { | |||
@@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||
const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | |||
uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||
const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||
void TbeOpTask::SetHandle(void *handle) { | |||
this->handle_ = handle; | |||
@@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||
return DoUpdateArgTable(param, false); | |||
} | |||
uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||
const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||
void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||
arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | |||
@@ -43,7 +43,7 @@ class OpTask { | |||
const vector<GeTensorDesc> &output_desc); | |||
virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
void SetModelArgs(std::string model_name, uint32_t model_id); | |||
Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
const OpDescPtr &GetOpdesc() const {return op_desc_;} | |||
Status OpenDump(rtStream_t stream); | |||
virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | |||
@@ -52,7 +52,7 @@ class OpTask { | |||
std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &output_buffers, | |||
rtStream_t stream); | |||
virtual uint32_t GetTaskType() const; | |||
virtual const std::string &GetTaskType() const; | |||
protected: | |||
Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | |||
@@ -88,7 +88,7 @@ class TbeOpTask : public OpTask { | |||
size_t GetArgSize() const; | |||
const std::string &GetStubName() const; | |||
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | |||
uint32_t GetTaskType() const override; | |||
const std::string &GetTaskType() const override; | |||
void SetHandle(void *handle); | |||
private: | |||
@@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask { | |||
~AiCpuBaseTask() override; | |||
UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | |||
Status UpdateArgTable(const SingleOpModelParam ¶m) override; | |||
uint32_t GetTaskType() const override; | |||
const std::string &GetTaskType() const override; | |||
protected: | |||
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
@@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | |||
// profiling data | |||
const uint32_t kTaskTypeAicore = 0; | |||
const uint32_t kTaskTypeAicpu = 1; | |||
const uint32_t kTaskTypeInvalid = 0xFFFF; | |||
const std::string kTaskTypeAicore = "AI_CORE"; | |||
const std::string kTaskTypeAicpu = "AI_CPU"; | |||
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||
// Data cache, including data address and length | |||
struct DataBuffer { | |||
@@ -251,27 +251,19 @@ struct Options { | |||
struct TaskDescInfo { | |||
std::string model_name; | |||
std::string op_name; | |||
std::string op_type; | |||
uint32_t block_dim; | |||
uint32_t task_id; | |||
uint32_t stream_id; | |||
std::string shape_type; | |||
int64_t cur_iter_num; | |||
uint32_t task_type; | |||
}; | |||
// Profiling info of graph | |||
struct ComputeGraphDescInfo { | |||
std::string model_name; | |||
std::string op_name; | |||
std::string op_type; | |||
std::string task_type; | |||
std::vector<Format> input_format; | |||
std::vector<std::vector<int64_t>> input_shape; | |||
std::vector<DataType> input_data_type; | |||
std::vector<Format> output_format; | |||
std::vector<std::vector<int64_t>> output_shape; | |||
std::vector<DataType> output_data_type; | |||
uint32_t task_id; | |||
uint32_t stream_id; | |||
}; | |||
struct OpDescInfo { | |||
@@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES | |||
) | |||
set(SINGLE_OP_TEST_FILES | |||
#"single_op/single_op_model_unittest.cc" | |||
"single_op/single_op_model_unittest.cc" | |||
"single_op/single_op_manager_unittest.cc" | |||
"single_op/stream_resource_unittest.cc" | |||
"single_op/single_op_task_unittest.cc" | |||
@@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) { | |||
model.SinkModelProfile(); | |||
} | |||
TEST_F(UtestDavinciModel, Sink_time_profile) { | |||
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport; | |||
DavinciModel model(0, nullptr); | |||
InputData current_data; | |||
model.SinkTimeProfile(current_data); | |||
} | |||
} // namespace ge |
@@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test { | |||
void TearDown() {} | |||
}; | |||
//rt api stub | |||
rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) { | |||
return RT_ERROR_NONE; | |||
} | |||
/* | |||
TEST_F(UtestSingleOpModel, test_init_model) { | |||
string model_data_str = "123456789"; | |||
@@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) { | |||
std::mutex stream_mu_; | |||
rtStream_t stream_ = nullptr; | |||
SingleOp single_op(&stream_mu_, stream_); | |||
ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||
// SingleOp single_op(&stream_mu_, stream_); | |||
// | |||
// ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS); | |||
} | |||
/* | |||
TEST_F(UtestSingleOpModel, test_build_kernel_task) { | |||
@@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) { | |||
ASSERT_EQ(op_model.Init(), FAILED); | |||
} | |||
*/ | |||
/* | |||
TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||
string model_data_str = "123456789"; | |||
SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size()); | |||
@@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) { | |||
ASSERT_EQ(op.arg_table_[1].size(), 1); | |||
ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]); | |||
} | |||
*/ | |||
TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) { | |||
string name = "relu"; | |||
string type = "relu"; | |||
auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||
op_desc->SetStreamId(0); | |||
op_desc->SetId(0); | |||
TbeOpTask task; | |||
task.op_desc_ = op_desc; | |||
task.model_name_ = "resnet_50"; | |||
task.model_id_ = 1; | |||
TaskDescInfo task_desc_info; | |||
uint32_t model_id; | |||
task.GetProfilingArgs(task_desc_info, model_id); | |||
ASSERT_EQ(task_desc_info.model_name, "resnet_50"); | |||
ASSERT_EQ(model_id, 1); | |||
} | |||