Browse Source

profiling file to json

tags/v1.2.0
zhengyuanhua 3 years ago
parent
commit
696c7f4b8f
17 changed files with 314 additions and 449 deletions
  1. +121
    -112
      ge/common/profiling/profiling_manager.cc
  2. +6
    -5
      ge/common/profiling/profiling_manager.h
  3. +1
    -1
      ge/graph/build/task_generator.cc
  4. +94
    -180
      ge/graph/load/model_manager/davinci_model.cc
  5. +0
    -3
      ge/graph/load/model_manager/davinci_model.h
  6. +7
    -53
      ge/hybrid/executor/worker/execution_engine.cc
  7. +2
    -3
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  8. +2
    -3
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  9. +3
    -29
      ge/hybrid/node_executor/task_context.cc
  10. +2
    -6
      ge/hybrid/node_executor/task_context.h
  11. +7
    -23
      ge/single_op/single_op.cc
  12. +24
    -9
      ge/single_op/task/op_task.cc
  13. +4
    -4
      ge/single_op/task/op_task.h
  14. +5
    -13
      inc/framework/common/ge_types.h
  15. +1
    -1
      tests/ut/ge/CMakeLists.txt
  16. +7
    -0
      tests/ut/ge/graph/load/davinci_model_unittest.cc
  17. +28
    -4
      tests/ut/ge/single_op/single_op_model_unittest.cc

+ 121
- 112
ge/common/profiling/profiling_manager.cc View File

@@ -20,6 +20,8 @@
#include "framework/common/debug/log.h"
#include "framework/common/string_util.h"
#include "graph/ge_context.h"
#include "graph/utils/type_utils.h"
#include "graph/types.h"
#include "runtime/base.h"
#include "graph/load/model_manager/davinci_model.h"

@@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point";
#ifdef DAVINCI_SUPPORT_PROFILING
const size_t kReportMaxLen = 2048;
const int32_t kMaxDeviceNum = 256;
const uint32_t kInteval = 2;
const std::string kConfigNumsdev = "devNums";
const std::string kConfigDevIdList = "devIdList";
const std::string kProfStart = "prof_start";
const std::string kProfStop = "prof_stop";
const std::string kProfModelSubscribe = "prof_model_subscribe";
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
const std::string kModelName = "model_name";
const std::string kModelId = "model_id";
const std::string kOpNmae = "op_name";
const std::string kOptype = "op_type";
const std::string kBlockDim = "block_dims";
const std::string kTaskId = "task_id";
const std::string kStreamId = "stream_id";
const std::string kShapeType = "shape_type";
const std::string kCurIterNum = "cur_iter_num";
const std::string kTaskType = "task_type";
const std::string kInput = "input";
const std::string kOutput = "output";
const std::string kFormat = "format";
const std::string kDataType = "data_type";
const std::string kShape = "shape";
const std::string kIdx = "idx";

#endif
} // namespace

@@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo(
const TaskDescInfo &task, Json &task_json) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::string data;
for (const auto &task : task_desc_info) {
std::string model_name = task.model_name;
std::string op_name = task.op_name;
uint32_t block_dim = task.block_dim;
uint32_t task_id = task.task_id;
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
int64_t cur_iter_num = task.cur_iter_num;
uint32_t task_type = task.task_type;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim)).append(" ")
.append(std::to_string(task_id)).append(" ")
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append(" ")
.append(shape_type).append(" ")
.append(std::to_string(cur_iter_num)).append(" ")
.append(std::to_string(task_type)).append("\n");

ReporterData reporter_data{};
reporter_data.deviceId = device_id;
reporter_data.data = (unsigned char *)data.c_str();
reporter_data.dataLen = data.size();
int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info"));
if (ret != EOK) {
GELOGE(ret, "Report data tag of task_desc_info memcpy error!");
return;
}

int32_t cb_ret = CallMsprofReport(reporter_data);
if (cb_ret != 0) {
GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret);
return;
}
for (size_t i = 0; i < task.input_format.size(); i++) {
Json tmp_input;
tmp_input[kIdx] = i;
Format format = task.input_format[i];
tmp_input[kFormat] = TypeUtils::FormatToSerialString(format);
DataType data_type = task.input_data_type[i];
tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
tmp_input[kShape] = task.input_shape[i];
task_json[kInput] += tmp_input;
}

for (size_t i = 0; i < task.output_format.size(); i++) {
Json tmp_output;
tmp_output[kIdx] = i;
Format format = task.output_format[i];
tmp_output[kFormat] = TypeUtils::FormatToSerialString(format);
DataType data_type = task.output_data_type[i];
tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
tmp_output[kShape] = task.output_shape[i];
task_json[kOutput] += tmp_output;
}

data.clear();
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::string data;
for (const auto &graph : compute_graph_desc_info) {
data.append("model_name:")
.append(graph.model_name)
.append(" op_name:")
.append(graph.op_name)
.append(" op_type:")
.append(graph.op_type);
for (size_t i = 0; i < graph.input_format.size(); ++i) {
data.append(" input_id:")
.append(std::to_string(i))
.append(" input_format:")
.append(std::to_string(graph.input_format.at(i)))
.append(" input_data_type:")
.append(std::to_string(graph.input_data_type.at(i)))
.append(" input_shape:\"");
size_t input_shape_len = graph.input_shape.at(i).size();
if (input_shape_len == 0) {
data.append("");
} else if (input_shape_len == 1) {
data.append(std::to_string(graph.input_shape.at(i).at(0)));
} else {
for (size_t j = 0; j < input_shape_len - 1; ++j) {
data.append(std::to_string(graph.input_shape.at(i).at(j))).append(",");
}
data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1)));
}

data.append("\"");
}

for (size_t i = 0; i < graph.output_format.size(); ++i) {
data.append(" output_id:")
.append(std::to_string(i))
.append(" output_format:")
.append(std::to_string(graph.output_format.at(i)))
.append(" output_data_type:")
.append(std::to_string(graph.output_data_type.at(i)))
.append(" output_shape:\"");
size_t output_shape_len = graph.output_shape.at(i).size();
if (output_shape_len == 0) {
data.append("");
} else if (output_shape_len == 1) {
data.append(std::to_string(graph.output_shape.at(i).at(0)));
} else {
for (size_t j = 0; j < output_shape_len - 1; ++j) {
data.append(std::to_string(graph.output_shape.at(i).at(j))).append(",");
}
data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1)));
}
data.append("\"");
for (const auto &task : task_desc_info) {
Json task_info;
task_info[kModelName] = task.model_name;
task_info[kModelId] = model_id;
task_info[kOpNmae] = task.op_name;
task_info[kOptype] = task.op_type;
task_info[kBlockDim] = task.block_dim;
task_info[kTaskType] = task.task_type;
task_info[kTaskId] = task.task_id;
task_info[kStreamId] = task.stream_id;
task_info[kCurIterNum] = task.cur_iter_num;
task_info[kShapeType] = task.shape_type;
ProfilingOpInputOutInfo(task, task_info);

std::string reported_data;
try {
reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
return ;
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
return;
}

data.append(" model_id:").append(std::to_string(model_id));
data.append(" task_id:").append(std::to_string(graph.task_id));
data.append(" stream_id:").append(std::to_string(graph.stream_id));
data.append("\n");

GraphDescReport(device_id, data);
data.clear();
reported_data.append(",")
.append("\n");
ReportData(device_id, reported_data, "task_desc_info");
}
#endif
}

void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData(
const int32_t &device_id, const string &data, const string &tag_name) {
#ifdef DAVINCI_SUPPORT_PROFILING
ReporterData reporter_data{};
int ret = -1;
@@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d
size_t index = data.size() / kReportMaxLen;
if (index >= 1) {
reporter_data.deviceId = device_id;
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);
for (size_t i = 0; i < index; ++i) {
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i;
reporter_data.dataLen = kReportMaxLen;
cb_ret = CallMsprofReport(reporter_data);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
return;);
}
reporter_data.dataLen = data.size() - kReportMaxLen * index;
if (reporter_data.dataLen != 0) {
reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index;
cb_ret = CallMsprofReport(reporter_data);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
return;);
}
} else {
reporter_data.deviceId = device_id;
reporter_data.data = (unsigned char *)data.c_str();
reporter_data.dataLen = data.size();
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);

cb_ret = CallMsprofReport(reporter_data);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
return;);
}
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) {
#ifdef DAVINCI_SUPPORT_PROFILING
int32_t logic_device_id = 0;
rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
GELOGD("current logic_device_id:%d", logic_device_id);
GELOGD("start ProfilingTaskDescInfo.");
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
GELOGD("start ProfilingGraphDescInfo.");
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
GELOGD("Report profiling data for GE end.");
#endif
}
@@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs
static_cast<void *>(&reporter_data), sizeof(ReporterData));
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo(
const OpDescPtr &op, TaskDescInfo &task_desc_info) const {
std::vector<Format> input_format;
std::vector<std::vector<int64_t>> input_shape;
std::vector<DataType> input_data_type;
for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
if (input_tensor_desc == nullptr) {
continue;
}
input_format.emplace_back(input_tensor_desc->GetFormat());
input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
input_data_type.emplace_back(input_tensor_desc->GetDataType());
}
std::vector<Format> output_format;
std::vector<std::vector<int64_t>> output_shape;
std::vector<DataType> output_data_type;
for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
if (output_tensor_desc == nullptr) {
continue;
}
output_format.emplace_back(output_tensor_desc->GetFormat());
output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
output_data_type.emplace_back(output_tensor_desc->GetDataType());
}

std::vector<Format> format_default = { FORMAT_NULL };
std::vector<std::vector<int64_t>> shape_default = { {0} };
std::vector<DataType> data_type_default = { DT_UNDEFINED };
task_desc_info.input_format = input_format.empty() ? format_default : input_format;
task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape;
task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type;
task_desc_info.output_format = output_format.empty() ? format_default : output_format;
task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape;
task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint(
std::string &fp_point, std::string &bp_point) {
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init


+ 6
- 5
ge/common/profiling/profiling_manager.h View File

@@ -54,6 +54,8 @@ namespace {

} // namespace
namespace ge {
class OpDesc;
using OpDescPtr = std::shared_ptr<OpDesc>;
struct DeviceSubsInfo {
uint64_t module;
uint32_t subscribe_count;
@@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
bool ProfilingModelExecuteOn() const;
// is_execute_profiling_ only used by ge option and env
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info);
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const int32_t &device_id);
void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
const int32_t &device_id);
void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json);
Status PluginInit() const;
void PluginUnInit() const;
Status CallMsprofReport(ReporterData &reporter_data) const;
@@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name);
private:
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
Status ParseOptions(const std::string &options);
@@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
vector<int32_t> &device_list);
uint64_t GetProfilingModule();
void GraphDescReport(const int32_t &device_id, const string &data);
void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list);
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);



+ 1
- 1
ge/graph/build/task_generator.cc View File

@@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi

// subgraph of dynamic graph no need to find index, has been found in parent graph
if (IsSubGraphOfDynamicGraph(graph)) {
GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str());
GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str());
return SUCCESS;
}



+ 94
- 180
ge/graph/load/model_manager/davinci_model.cc View File

@@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a;
const uint32_t kEndOfSequenceNew = 507005;
const int32_t kModelAbortNormal = 0x0704000e;
const int32_t kModelAbortNormalNew = 507024;
const uint32_t kInteval = 2;
const char *const kModelName = "model_name";
const char *const kModeleId = "model_id";
const char *const kLoadStartTime = "load_start_time";
const char *const kLoadEndTime = "load_end_time";
const char *const kFusionOpInfo = "fusion_op_info";
const char *const kFusionOpName = "fusion_op_name";
const char *const kOriginalOpNum = "origin_op_num";
const char *const kOriginalOpName = "origin_op_name";
const char *const kStreamId = "stream_id";
const char *const kFusionOpMemoryInfo = "memory_info";
const char *const kInputSize = "input_size";
const char *const kOutputSize = "output_size";
const char *const kWeightSize = "weight_size";
const char *const kWorkSpaceSize = "workspace_size";
const char *const kTotalSize = "total_size";
const char *const kTaskCount = "task_count";
const char *const kTaskId = "task_id";
const char* const kRequestId = "request_id";
const char* const kThreadId = "thread_id";
const char* const kInputBeginTime = "input_begin_time";
const char* const kInputEndTime = "input_end_time";
const char* const kInferBeginTime = "infer_begin_time";
const char* const kInferEndTime = "infer_end_time";
const char* const kOutputBeginTime = "output_start_time";
const char* const kOutputEndTime = "output_end_time";

inline bool IsDataOp(const std::string &node_type) {
return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE);
@@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
}

Status DavinciModel::ReportProfilingData() {
std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
Status ret = GetComputeGraphInfo(compute_graph_desc_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetComputeGraphInfo failed.");
return ret;
}
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo());
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");

return SUCCESS;
@@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() {
}

Status DavinciModel::SinkModelProfile() {
// profiling plugin must be registered
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};
// report model data tag name
std::string tag_name("model_load_info_" + std::to_string(this->Id()));
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");

// Model Header
std::string name = om_name_.empty() ? name_ : om_name_;
size_t name_len = name.size();
reporter_data.deviceId = device_id_;
reporter_data.data = (unsigned char *)&name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)name.c_str();
reporter_data.dataLen = name.size();
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

uint32_t model_id = this->Id();
reporter_data.data = (unsigned char *)&model_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// Load Start/End Time
int64_t start_time = this->GetLoadBeginTime();
reporter_data.data = (unsigned char *)&start_time;
reporter_data.dataLen = sizeof(int64_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

int64_t end_time = this->GetLoadEndTime();
reporter_data.data = (unsigned char *)&end_time;
reporter_data.dataLen = sizeof(int64_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Json model_load_info;
model_load_info[kModelName] = name;
model_load_info[kModeleId] = model_id;
model_load_info[kLoadStartTime] = start_time;
model_load_info[kLoadEndTime] = end_time;
// fusion op info
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const ProfileInfo &profile : profile_list_) {
// op name after fusion
Json fusion_op_info;
string fusion_op_name = profile.fusion_info.op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
uint32_t op_num = profile.fusion_info.original_op_names.size();
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

vector<string> original_name;
for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = profile.fusion_info.original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
original_name.emplace_back(profile.fusion_info.original_op_names[k]);
}
uint32_t stream_id = 0;
auto iter = profiler_report_op_info_.find(fusion_op_name);
if (iter != profiler_report_op_info_.end()) {
stream_id = iter->second.second;
}
reporter_data.data = (unsigned char *)&stream_id;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
reporter_data.data = (unsigned char *)&profile.memory_info;
reporter_data.dataLen = sizeof(profile.memory_info);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// task info
reporter_data.data = (unsigned char *)&profile.task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

fusion_op_info[kFusionOpName] = fusion_op_name;
fusion_op_info[kOriginalOpNum] = op_num;
fusion_op_info[kOriginalOpName] = original_name;
fusion_op_info[kStreamId] = stream_id;
fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size;
fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size;
fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size;
fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size;
fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size;
fusion_op_info[kTaskCount] = profile.task_count;
vector<uint32_t> task_id;
Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
task_id.push_back(idx->second);
}
fusion_op_info[kTaskId] = task_id;
model_load_info[kFusionOpInfo] += fusion_op_info;
}

std::string tag_name("model_load_info_" + std::to_string(this->Id()));
std::string reported_data;
try {
reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id_, reported_data, tag_name);
return SUCCESS;
}

Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
// profiling plugin must be registered
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};

string name = om_name_.empty() ? name_ : om_name_;
Json model_time_info;
model_time_info[kModelName] = name;
model_time_info[kModeleId] = this->Id();
model_time_info[kRequestId] = current_data.request_id;
model_time_info[kThreadId] = GetDataInputTid();
model_time_info[kInputBeginTime] = time_info_.processBeginTime;
model_time_info[kInputEndTime] = time_info_.processEndTime;
model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime;
model_time_info[kInferEndTime] = time_info_.inferenceEndTime;
model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime;
model_time_info[kOutputEndTime] = time_info_.dumpEndTime;

// report model data tag name
std::string tag_name;
tag_name.append("model_time_info_")
.append(std::to_string(this->Id()))
.append("_")
.append(std::to_string(current_data.index));

GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");
// device id
reporter_data.deviceId = device_id_;

// Model Header
string name;
if (!om_name_.empty()) {
name = om_name_;
} else {
name = name_;
}
size_t name_len = name.size();
reporter_data.data = (unsigned char *)&name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)name.c_str();
reporter_data.dataLen = name.size();
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// request id
uint64_t request_id = current_data.request_id;
reporter_data.data = (unsigned char *)&request_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

// thread id
int32_t thread_id = GetDataInputTid();
reporter_data.data = (unsigned char *)&thread_id;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

// time info
time_info_.modelId = this->Id();
reporter_data.data = (unsigned char *)&time_info_;
reporter_data.dataLen = sizeof(struct timeInfo);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);
.append(std::to_string(this->Id()))
.append("_")
.append(std::to_string(current_data.index));
std::string reported_data;
try {
reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id_, reported_data, tag_name);

return SUCCESS;
}
@@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.op_type = op->GetType();
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
// task type
task_desc_info.task_type = kTaskTypeInvalid;
auto &prof_mgr = ProfilingManager::Instance();
prof_mgr.GetOpInputOutputInfo(op, task_desc_info);
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (model_task_type == RT_MODEL_TASK_KERNEL) {
const domi::KernelDef &kernel_def = task_def.kernel();
@@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
task_desc_info_.emplace_back(task_desc_info);
}
}
return;
}

Status DavinciModel::DistributeTask() {
@@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
}

Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
for (auto &op_desc : all_op_desc) {
ComputeGraphDescInfo compute_graph_info;
if (!om_name_.empty()) {
compute_graph_info.model_name = om_name_;
} else {
compute_graph_info.model_name = name_;
}

std::vector<Format> format = { FORMAT_NULL };
std::vector<std::vector<int64_t>> shape = { {0} };
std::vector<DataType> data_type = { DT_UNDEFINED };
compute_graph_info.op_name = op_desc.op_name;
compute_graph_info.op_type = op_desc.op_type;
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format;
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape;
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type;
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format;
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape;
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type;
uint32_t task_id = 0;
uint32_t stream_id = 0;
auto iter = profiler_report_op_info_.find(op_desc.op_name);
if (iter != profiler_report_op_info_.end()) {
task_id = iter->second.first;
stream_id = iter->second.second;
}
compute_graph_info.task_id = task_id;
compute_graph_info.stream_id = stream_id;
graph_desc_info.emplace_back(compute_graph_info);
}
return SUCCESS;
}

void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) {
if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) {
tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_;


+ 0
- 3
ge/graph/load/model_manager/davinci_model.h View File

@@ -840,9 +840,6 @@ class DavinciModel {

Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

// get desc info of graph for profiling
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);

void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);

Status InitL1DataDumperArgs();


+ 7
- 53
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -70,8 +70,6 @@ class NodeDoneCallback {
Status PrepareConstInputs(const NodeItem &node_item);
Status DumpDynamicNode();
Status ProfilingReport();
Status GetGraphDescInfo(const NodePtr node, const HybridModel *model,
std::vector<ComputeGraphDescInfo> &compute_graph_info);
Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
std::vector<TaskDescInfo> &task_desc_info);
GraphExecutionContext *graph_context_;
@@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
}

GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
auto &prof_mgr = ProfilingManager::Instance();
task_desc_info = context_->GetProfilingTaskDescInfo();
context_->ClearProfilingTaskDescInfo();

return SUCCESS;
}

Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model,
std::vector<ComputeGraphDescInfo> &compute_graph_info) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(model);

GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str());
compute_graph_info = context_->GetProfilingGraphDescInfo();
context_->ClearProfilingGraphDescInfo();

auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (auto &tmp_compute_graph_info : compute_graph_info) {
// default
if (op_desc->GetAllInputsSize() == 0) {
tmp_compute_graph_info.input_format = { FORMAT_NULL };
tmp_compute_graph_info.input_shape = { {0} };
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
}
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
if (input_desc == nullptr) {
continue;
}
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
}

if (op_desc->GetOutputsSize() == 0) {
tmp_compute_graph_info.output_format = { FORMAT_NULL };
tmp_compute_graph_info.output_shape = { {0} };
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
}
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
}
for (auto &tmp_task_desc : task_desc_info) {
// save op input and output info
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc);
}

return SUCCESS;
@@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() {
return profiling_ret;
}

std::vector<ComputeGraphDescInfo> compute_graph_info;
profiling_ret = GetGraphDescInfo(node, model, compute_graph_info);
if (profiling_ret != RT_ERROR_NONE) {
GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str());
return profiling_ret;
}

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info);
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info);
return SUCCESS;
}



+ 2
- 3
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return FAILED;
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
}


+ 2
- 3
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return FAILED;
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
auto callback = [=, &context]() {
GELOGD("Node[%s] callback start.", node_name_.c_str());
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");


+ 3
- 29
ge/hybrid/node_executor/task_context.cc View File

@@ -515,7 +515,7 @@ Status TaskContext::Synchronize() {
}

Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
uint32_t task_type, uint32_t block_dim) {
const std::string &task_type, uint32_t block_dim) {
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
const NodeItem &node_item = GetNodeItem();
auto op_desc = node_item.GetOpDesc();
@@ -525,11 +525,11 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream
const HybridModel *model = graph_context->model;
GE_CHECK_NOTNULL(model);

std::string op_name = op_desc->GetName();
std::string dynamic_model_name = model->GetModelName();
TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.op_name = op_desc->GetName();
tmp_task_desc_info.op_type = op_desc->GetType();
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_type = task_type;
tmp_task_desc_info.task_id = task_id;
@@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const {
return node_state_;
}

Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) {
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
const NodeItem &node_item = GetNodeItem();
auto op_desc = node_item.GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
const GraphExecutionContext *graph_context = GetExecutionContext();
GE_CHECK_NOTNULL(graph_context);
const HybridModel *model = graph_context->model;
GE_CHECK_NOTNULL(model);

std::string dynamic_model_name = model->GetModelName();
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
ComputeGraphDescInfo tmp_compute_graph_info;
tmp_compute_graph_info.model_name = dynamic_model_name;
tmp_compute_graph_info.op_name = op_desc->GetName();
tmp_compute_graph_info.op_type = op_desc->GetType();
tmp_compute_graph_info.task_id = task_id;
tmp_compute_graph_info.stream_id = stream_id;
compute_graph_info.emplace_back(tmp_compute_graph_info);
}
}
return SUCCESS;
}

} // namespace hybrid
} // namespace ge

+ 2
- 6
ge/hybrid/node_executor/task_context.h View File

@@ -113,13 +113,10 @@ class TaskContext {
void *handle_ = nullptr;

const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim);
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
const std::string &task_type, uint32_t block_dim);
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }

const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; }
Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id);
void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); }

private:
TaskContext(GraphExecutionContext *execution_context,
NodeState *node_state,
@@ -141,7 +138,6 @@ class TaskContext {
uint32_t task_id_ = 0;
uint32_t stream_id_ = 0;
std::vector<TaskDescInfo> task_desc_info;
std::vector<ComputeGraphDescInfo> compute_graph_info;
};
} // namespace hybrid
} // namespace ge


+ 7
- 23
ge/single_op/single_op.cc View File

@@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
return SUCCESS;
}

string model_name;
string op_name;
TaskDescInfo tmp_task_desc_info;
uint32_t model_id;
uint32_t block_dim;
if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) {
if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed");
return ACL_ERROR_GE_PARAM_INVALID;
}
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str());
std::vector<TaskDescInfo> task_desc_info;
uint32_t task_id = 0;
uint32_t stream_id = 0;
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("ProfilingReport of op[%s] model[%s] start.",
tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str());

TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = model_name;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = shape_type;
tmp_task_desc_info.cur_iter_num = 0;
tmp_task_desc_info.task_type = op_task->GetTaskType();
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);

std::vector<ComputeGraphDescInfo> compute_graph_info;
std::vector<TaskDescInfo> task_desc_info;
task_desc_info.emplace_back(tmp_task_desc_info);

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info);
profiling_manager.ReportProfilingData(model_id, task_desc_info);
return SUCCESS;
}
} // namespace


+ 24
- 9
ge/single_op/task/op_task.cc View File

@@ -23,6 +23,7 @@
#include "aicpu/common/aicpu_task_struct.h"
#include "common/dump/dump_manager.h"
#include "common/dump/dump_op.h"
#include "common/profiling/profiling_manager.h"
#include "common/formats/formats.h"
#include "common/math/math_util.h"
#include "framework/common/debug/log.h"
@@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
model_id_ = model_id;
}

Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id,
uint32_t &block_dim) {
model_name = model_name_;
model_id = model_id_;
block_dim = block_dim_;
Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) {
uint32_t task_id = 0;
uint32_t stream_id = 0;
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_CHECK_NOTNULL(op_desc_);
op_name = op_desc_->GetName();
string op_name = op_desc_->GetName();
GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
model_id = model_id_;
task_desc_info.model_name = model_name_;
task_desc_info.block_dim = block_dim_;
task_desc_info.task_id = task_id;
task_desc_info.stream_id = stream_id;
task_desc_info.op_name = op_name;
task_desc_info.op_type = op_desc_->GetType();
auto &prof_mgr = ProfilingManager::Instance();
prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info);
return SUCCESS;
}

Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
return UNSUPPORTED;
}
@@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
return UNSUPPORTED;
}

uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; }
const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; }

TbeOpTask::~TbeOpTask() {
if (sm_desc_ != nullptr) {
@@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; }

const std::string &TbeOpTask::GetStubName() const { return stub_name_; }

uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }
const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }

void TbeOpTask::SetHandle(void *handle) {
this->handle_ = handle;
@@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
return DoUpdateArgTable(param, false);
}

uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }
const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }

void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data());


+ 4
- 4
ge/single_op/task/op_task.h View File

@@ -43,7 +43,7 @@ class OpTask {
const vector<GeTensorDesc> &output_desc);
virtual Status UpdateArgTable(const SingleOpModelParam &param);
void SetModelArgs(std::string model_name, uint32_t model_id);
Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim);
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(rtStream_t stream);
virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0;
@@ -52,7 +52,7 @@ class OpTask {
std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &output_buffers,
rtStream_t stream);
virtual uint32_t GetTaskType() const;
virtual const std::string &GetTaskType() const;

protected:
Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);
@@ -88,7 +88,7 @@ class TbeOpTask : public OpTask {
size_t GetArgSize() const;
const std::string &GetStubName() const;
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
uint32_t GetTaskType() const override;
const std::string &GetTaskType() const override;
void SetHandle(void *handle);

private:
@@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask {
~AiCpuBaseTask() override;
UnknowShapeOpType GetUnknownType() const { return unknown_type_; }
Status UpdateArgTable(const SingleOpModelParam &param) override;
uint32_t GetTaskType() const override;
const std::string &GetTaskType() const override;

protected:
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);


+ 5
- 13
inc/framework/common/ge_types.h View File

@@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";

// profiling data
const uint32_t kTaskTypeAicore = 0;
const uint32_t kTaskTypeAicpu = 1;
const uint32_t kTaskTypeInvalid = 0xFFFF;
const std::string kTaskTypeAicore = "AI_CORE";
const std::string kTaskTypeAicpu = "AI_CPU";
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";

// Data cache, including data address and length
struct DataBuffer {
@@ -251,27 +251,19 @@ struct Options {
struct TaskDescInfo {
std::string model_name;
std::string op_name;
std::string op_type;
uint32_t block_dim;
uint32_t task_id;
uint32_t stream_id;
std::string shape_type;
int64_t cur_iter_num;
uint32_t task_type;
};

// Profiling info of graph
struct ComputeGraphDescInfo {
std::string model_name;
std::string op_name;
std::string op_type;
std::string task_type;
std::vector<Format> input_format;
std::vector<std::vector<int64_t>> input_shape;
std::vector<DataType> input_data_type;
std::vector<Format> output_format;
std::vector<std::vector<int64_t>> output_shape;
std::vector<DataType> output_data_type;
uint32_t task_id;
uint32_t stream_id;
};

struct OpDescInfo {


+ 1
- 1
tests/ut/ge/CMakeLists.txt View File

@@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES
)

set(SINGLE_OP_TEST_FILES
#"single_op/single_op_model_unittest.cc"
"single_op/single_op_model_unittest.cc"
"single_op/single_op_manager_unittest.cc"
"single_op/stream_resource_unittest.cc"
"single_op/single_op_task_unittest.cc"


+ 7
- 0
tests/ut/ge/graph/load/davinci_model_unittest.cc View File

@@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) {
model.SinkModelProfile();
}

TEST_F(UtestDavinciModel, Sink_time_profile) {
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
DavinciModel model(0, nullptr);
InputData current_data;
model.SinkTimeProfile(current_data);
}

} // namespace ge

+ 28
- 4
tests/ut/ge/single_op/single_op_model_unittest.cc View File

@@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test {
void TearDown() {}
};

//rt api stub
rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) {
return RT_ERROR_NONE;
}
/*
TEST_F(UtestSingleOpModel, test_init_model) {
string model_data_str = "123456789";
@@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) {

std::mutex stream_mu_;
rtStream_t stream_ = nullptr;
SingleOp single_op(&stream_mu_, stream_);
ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
// SingleOp single_op(&stream_mu_, stream_);
//
// ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
}
/*
TEST_F(UtestSingleOpModel, test_build_kernel_task) {
@@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) {
ASSERT_EQ(op_model.Init(), FAILED);
}
*/
/*
TEST_F(UtestSingleOpModel, test_parse_arg_table) {
string model_data_str = "123456789";
SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size());
@@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) {
ASSERT_EQ(op.arg_table_[1].size(), 1);
ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]);
}
*/
TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) {
string name = "relu";
string type = "relu";
auto op_desc = std::make_shared<ge::OpDesc>(name, type);
op_desc->SetStreamId(0);
op_desc->SetId(0);
TbeOpTask task;
task.op_desc_ = op_desc;
task.model_name_ = "resnet_50";
task.model_id_ = 1;
TaskDescInfo task_desc_info;
uint32_t model_id;
task.GetProfilingArgs(task_desc_info, model_id);

ASSERT_EQ(task_desc_info.model_name, "resnet_50");
ASSERT_EQ(model_id, 1);
}



Loading…
Cancel
Save