profiling file to json

4 years ago · 696c7f4b8f
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -20,6 +20,8 @@
 #include "framework/common/debug/log.h"
 #include "framework/common/string_util.h"
 #include "graph/ge_context.h"
 #include "graph/utils/type_utils.h"
 #include "graph/types.h"
 #include "runtime/base.h"
 #include "graph/load/model_manager/davinci_model.h"

@@ -31,12 +33,30 @@ const char *const kBpPoint = "bp_point";
 #ifdef DAVINCI_SUPPORT_PROFILING
 const size_t kReportMaxLen = 2048;
 const int32_t kMaxDeviceNum = 256;
 const uint32_t kInteval = 2;
 const std::string kConfigNumsdev = "devNums";
 const std::string kConfigDevIdList = "devIdList";
 const std::string kProfStart = "prof_start";
 const std::string kProfStop = "prof_stop";
 const std::string kProfModelSubscribe = "prof_model_subscribe";
 const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 const std::string kModelName = "model_name";
 const std::string kModelId = "model_id";
 const std::string kOpNmae = "op_name";
 const std::string kOptype = "op_type";
 const std::string kBlockDim = "block_dims";
 const std::string kTaskId = "task_id";
 const std::string kStreamId = "stream_id";
 const std::string kShapeType = "shape_type";
 const std::string kCurIterNum = "cur_iter_num";
 const std::string kTaskType = "task_type";
 const std::string kInput = "input";
 const std::string kOutput = "output";
 const std::string kFormat = "format";
 const std::string kDataType = "data_type";
 const std::string kShape = "shape";
 const std::string kIdx = "idx";

 #endif
 }  // namespace

@@ -206,118 +226,69 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingOpInputOutInfo(
    const TaskDescInfo &task, Json &task_json) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::string data;
  for (const auto &task : task_desc_info) {
    std::string model_name = task.model_name;
    std::string op_name = task.op_name;
    uint32_t block_dim = task.block_dim;
    uint32_t task_id = task.task_id;
    uint32_t stream_id = task.stream_id;
    std::string shape_type = task.shape_type;
    int64_t cur_iter_num = task.cur_iter_num;
    uint32_t task_type = task.task_type;
    data = model_name.append(" ")
                     .append(op_name).append(" ")
                     .append(std::to_string(block_dim)).append(" ")
                     .append(std::to_string(task_id)).append(" ")
                     .append(std::to_string(stream_id)).append(" ")
                     .append(std::to_string(model_id)).append(" ")
                     .append(shape_type).append(" ")
                     .append(std::to_string(cur_iter_num)).append(" ")
                     .append(std::to_string(task_type)).append("\n");

    ReporterData reporter_data{};
    reporter_data.deviceId = device_id;
    reporter_data.data = (unsigned char *)data.c_str();
    reporter_data.dataLen = data.size();
    int ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "task_desc_info", sizeof("task_desc_info"));
    if (ret != EOK) {
      GELOGE(ret, "Report data tag of task_desc_info memcpy error!");
      return;
    }

    int32_t cb_ret = CallMsprofReport(reporter_data);
    if (cb_ret != 0) {
      GELOGE(cb_ret, "Reporter data of task_desc_info failed, ret:%d", cb_ret);
      return;
    }
  for (size_t i = 0; i < task.input_format.size(); i++) {
    Json tmp_input;
    tmp_input[kIdx] = i;
    Format format = task.input_format[i];
    tmp_input[kFormat] = TypeUtils::FormatToSerialString(format);
    DataType data_type = task.input_data_type[i];
    tmp_input[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
    tmp_input[kShape] = task.input_shape[i];
    task_json[kInput] += tmp_input;
  }

  for (size_t i = 0; i < task.output_format.size(); i++) {
    Json tmp_output;
    tmp_output[kIdx] = i;
    Format format = task.output_format[i];
    tmp_output[kFormat] =  TypeUtils::FormatToSerialString(format);
    DataType data_type = task.output_data_type[i];
    tmp_output[kDataType] = TypeUtils::DataTypeToSerialString(data_type);
    tmp_output[kShape] = task.output_shape[i];
    task_json[kOutput] += tmp_output;
  }

  data.clear();
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
    uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
  uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  std::string data;
  for (const auto &graph : compute_graph_desc_info) {
    data.append("model_name:")
        .append(graph.model_name)
        .append(" op_name:")
        .append(graph.op_name)
        .append(" op_type:")
        .append(graph.op_type);
    for (size_t i = 0; i < graph.input_format.size(); ++i) {
      data.append(" input_id:")
          .append(std::to_string(i))
          .append(" input_format:")
          .append(std::to_string(graph.input_format.at(i)))
          .append(" input_data_type:")
          .append(std::to_string(graph.input_data_type.at(i)))
          .append(" input_shape:\"");
      size_t input_shape_len = graph.input_shape.at(i).size();
      if (input_shape_len == 0) {
        data.append("");
      } else if (input_shape_len == 1) {
        data.append(std::to_string(graph.input_shape.at(i).at(0)));
      } else {
        for (size_t j = 0; j < input_shape_len - 1; ++j) {
          data.append(std::to_string(graph.input_shape.at(i).at(j))).append(",");
        }
        data.append(std::to_string(graph.input_shape.at(i).at(input_shape_len - 1)));
      }

      data.append("\"");
    }

    for (size_t i = 0; i < graph.output_format.size(); ++i) {
      data.append(" output_id:")
          .append(std::to_string(i))
          .append(" output_format:")
          .append(std::to_string(graph.output_format.at(i)))
          .append(" output_data_type:")
          .append(std::to_string(graph.output_data_type.at(i)))
          .append(" output_shape:\"");
      size_t output_shape_len = graph.output_shape.at(i).size();
      if (output_shape_len == 0) {
        data.append("");
      } else if (output_shape_len == 1) {
        data.append(std::to_string(graph.output_shape.at(i).at(0)));
      } else {
        for (size_t j = 0; j < output_shape_len - 1; ++j) {
          data.append(std::to_string(graph.output_shape.at(i).at(j))).append(",");
        }
        data.append(std::to_string(graph.output_shape.at(i).at(output_shape_len - 1)));
      }
      data.append("\"");
  for (const auto &task : task_desc_info) {
    Json task_info;
    task_info[kModelName] = task.model_name;
    task_info[kModelId] = model_id;
    task_info[kOpNmae] = task.op_name;
    task_info[kOptype] = task.op_type;
    task_info[kBlockDim] = task.block_dim;
    task_info[kTaskType] = task.task_type;
    task_info[kTaskId] = task.task_id;
    task_info[kStreamId] = task.stream_id;
    task_info[kCurIterNum] = task.cur_iter_num;
    task_info[kShapeType] = task.shape_type;
    ProfilingOpInputOutInfo(task, task_info);

    std::string reported_data;
    try {
      reported_data = task_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
    } catch (std::exception &e) {
      GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
      return ;
    } catch (...) {
      GELOGE(FAILED, "Failed to convert JSON to string.");
      return;
    }

    data.append(" model_id:").append(std::to_string(model_id));
    data.append(" task_id:").append(std::to_string(graph.task_id));
    data.append(" stream_id:").append(std::to_string(graph.stream_id));
    data.append("\n");

    GraphDescReport(device_id, data);
    data.clear();
    reported_data.append(",")
                 .append("\n");
    ReportData(device_id, reported_data, "task_desc_info");
  }
 #endif
 }

 void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportData(
    const int32_t &device_id, const string &data, const string &tag_name) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  ReporterData reporter_data{};
  int ret = -1;
@@ -325,36 +296,38 @@ void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &d
  size_t index = data.size() / kReportMaxLen;
  if (index >= 1) {
    reporter_data.deviceId = device_id;
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);
    for (size_t i = 0; i < index; ++i) {
      reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * i;
      reporter_data.dataLen = kReportMaxLen;
      cb_ret = CallMsprofReport(reporter_data);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
                      return;);
    }
    reporter_data.dataLen = data.size() - kReportMaxLen * index;
    if (reporter_data.dataLen != 0) {
      reporter_data.data = (unsigned char *)data.c_str() + kReportMaxLen * index;
      cb_ret = CallMsprofReport(reporter_data);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
      GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
                      return;);
    }
  } else {
    reporter_data.deviceId = device_id;
    reporter_data.data = (unsigned char *)data.c_str();
    reporter_data.dataLen = data.size();
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, "graph_desc_info", sizeof("graph_desc_info"));
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag of graph_desc_info memcpy error!"); return;);
    ret = memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.size());
    GE_IF_BOOL_EXEC(ret != EOK, GELOGE(ret, "Report data tag [%s] memcpy error!", tag_name.c_str()); return;);

    cb_ret = CallMsprofReport(reporter_data);
    GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data of graph_desc_info failed, ret:%d", cb_ret); return;);
    GE_IF_BOOL_EXEC(cb_ret != 0, GELOGE(cb_ret, "Reporter data [%s] failed, ret:%d", tag_name.c_str(), cb_ret);
                    return;);
  }
 #endif
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
    const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info) {
 #ifdef DAVINCI_SUPPORT_PROFILING
  int32_t logic_device_id = 0;
  rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -365,8 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
  GELOGD("current logic_device_id:%d", logic_device_id);
  GELOGD("start ProfilingTaskDescInfo.");
  ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
  GELOGD("start ProfilingGraphDescInfo.");
  ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
  GELOGD("Report profiling data for GE end.");
 #endif
 }
@@ -813,6 +784,44 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs
      static_cast<void *>(&reporter_data), sizeof(ReporterData));
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo(
    const OpDescPtr &op, TaskDescInfo &task_desc_info) const {
  std::vector<Format> input_format;
  std::vector<std::vector<int64_t>> input_shape;
  std::vector<DataType> input_data_type;
  for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
    GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
    if (input_tensor_desc == nullptr) {
      continue;
    }
    input_format.emplace_back(input_tensor_desc->GetFormat());
    input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
    input_data_type.emplace_back(input_tensor_desc->GetDataType());
  }
  std::vector<Format> output_format;
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
  for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
    GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
    if (output_tensor_desc == nullptr) {
      continue;
    }
    output_format.emplace_back(output_tensor_desc->GetFormat());
    output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
    output_data_type.emplace_back(output_tensor_desc->GetDataType());
  }

  std::vector<Format> format_default =  { FORMAT_NULL };
  std::vector<std::vector<int64_t>> shape_default = { {0} };
  std::vector<DataType> data_type_default = { DT_UNDEFINED };
  task_desc_info.input_format = input_format.empty() ? format_default : input_format;
  task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape;
  task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type;
  task_desc_info.output_format = output_format.empty() ? format_default : output_format;
  task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape;
  task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint(
    std::string &fp_point, std::string &bp_point) {
  // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -54,6 +54,8 @@ namespace {

 }  // namespace
 namespace ge {
 class OpDesc;
 using OpDescPtr = std::shared_ptr<OpDesc>;
 struct DeviceSubsInfo {
  uint64_t module;
  uint32_t subscribe_count;
@@ -82,12 +84,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  bool ProfilingModelExecuteOn() const;
  // is_execute_profiling_ only used by ge option and env
  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info);
  void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                             const int32_t &device_id);
  void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
                              const int32_t &device_id);
  void ProfilingOpInputOutInfo(const TaskDescInfo &task, Json &task_json);
  Status PluginInit() const;
  void PluginUnInit() const;
  Status CallMsprofReport(ReporterData &reporter_data) const;
@@ -95,6 +95,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
  void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
  void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
  void GetOpInputOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const;
  void ReportData(const int32_t &device_id, const std::string &data, const std::string &tag_name);
 private:
  Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
  Status ParseOptions(const std::string &options);
@@ -103,7 +105,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
  Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
                               vector<int32_t> &device_list);
  uint64_t GetProfilingModule();
  void GraphDescReport(const int32_t &device_id, const string &data);
  void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector<int32_t> &device_list);
  void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);

--- a/ge/graph/build/task_generator.cc
+++ b/ge/graph/build/task_generator.cc
@@ -852,7 +852,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi

  // subgraph  of dynamic graph no need to find index, has been found in parent graph
  if (IsSubGraphOfDynamicGraph(graph)) {
    GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str());
    GELOGI("Graph[%s] is subgraph of dynamic graph, no need to find index.", graph->GetName().c_str());
    return SUCCESS;
  }

--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -92,6 +92,32 @@ const uint32_t kEndOfSequence = 0x0704000a;
 const uint32_t kEndOfSequenceNew = 507005;
 const int32_t kModelAbortNormal = 0x0704000e;
 const int32_t kModelAbortNormalNew = 507024;
 const uint32_t kInteval = 2;
 const char *const kModelName = "model_name";
 const char *const kModeleId = "model_id";
 const char *const kLoadStartTime = "load_start_time";
 const char *const kLoadEndTime = "load_end_time";
 const char *const kFusionOpInfo = "fusion_op_info";
 const char *const kFusionOpName = "fusion_op_name";
 const char *const kOriginalOpNum = "origin_op_num";
 const char *const kOriginalOpName = "origin_op_name";
 const char *const kStreamId = "stream_id";
 const char *const kFusionOpMemoryInfo = "memory_info";
 const char *const kInputSize = "input_size";
 const char *const kOutputSize = "output_size";
 const char *const kWeightSize = "weight_size";
 const char *const kWorkSpaceSize = "workspace_size";
 const char *const kTotalSize = "total_size";
 const char *const kTaskCount = "task_count";
 const char *const kTaskId = "task_id";
 const char* const kRequestId = "request_id";
 const char* const kThreadId = "thread_id";
 const char* const kInputBeginTime = "input_begin_time";
 const char* const kInputEndTime = "input_end_time";
 const char* const kInferBeginTime = "infer_begin_time";
 const char* const kInferEndTime = "infer_end_time";
 const char* const kOutputBeginTime = "output_start_time";
 const char* const kOutputEndTime = "output_end_time";

 inline bool IsDataOp(const std::string &node_type) {
  return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE);
@@ -744,13 +770,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
 }

 Status DavinciModel::ReportProfilingData() {
  std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
  Status ret = GetComputeGraphInfo(compute_graph_desc_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetComputeGraphInfo failed.");
    return ret;
  }
  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo());
  GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");

  return SUCCESS;
@@ -2202,173 +2222,101 @@ Status DavinciModel::InitModelProfile() {
 }

 Status DavinciModel::SinkModelProfile() {
  // profiling plugin must be registered
  auto &prof_mgr = ProfilingManager::Instance();
  ReporterData reporter_data{};
  // report model data tag name
  std::string tag_name("model_load_info_" + std::to_string(this->Id()));
  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");

  // Model Header
  std::string name = om_name_.empty() ? name_ : om_name_;
  size_t name_len = name.size();
  reporter_data.deviceId = device_id_;
  reporter_data.data = (unsigned char *)&name_len;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  reporter_data.data = (unsigned char *)name.c_str();
  reporter_data.dataLen = name.size();
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  uint32_t model_id = this->Id();
  reporter_data.data = (unsigned char *)&model_id;
  reporter_data.dataLen = sizeof(uint32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  // Load Start/End Time
  int64_t start_time = this->GetLoadBeginTime();
  reporter_data.data = (unsigned char *)&start_time;
  reporter_data.dataLen = sizeof(int64_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  int64_t end_time = this->GetLoadEndTime();
  reporter_data.data = (unsigned char *)&end_time;
  reporter_data.dataLen = sizeof(int64_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  Json model_load_info;
  model_load_info[kModelName] = name;
  model_load_info[kModeleId] = model_id;
  model_load_info[kLoadStartTime] = start_time;
  model_load_info[kLoadEndTime] = end_time;
  // fusion op info
  using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
  using Range = std::pair<CIT, CIT>;
  for (const ProfileInfo &profile : profile_list_) {
    // op name after fusion
    Json fusion_op_info;
    string fusion_op_name = profile.fusion_info.op_name;
    int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
    reporter_data.data = (unsigned char *)&fusion_op_name_len;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    reporter_data.data = (unsigned char *)fusion_op_name.c_str();
    reporter_data.dataLen = fusion_op_name_len;
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // original op name before fusion
    uint32_t op_num = profile.fusion_info.original_op_names.size();
    reporter_data.data = (unsigned char *)&op_num;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    vector<string> original_name;
    for (uint32_t k = 0; k < op_num; k++) {
      std::string op_name = profile.fusion_info.original_op_names[k];
      int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
      reporter_data.data = (unsigned char *)&op_name_len;
      reporter_data.dataLen = sizeof(int32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
      reporter_data.data = (unsigned char *)op_name.c_str();
      reporter_data.dataLen = op_name_len;
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
    }

    // stream id info
      original_name.emplace_back(profile.fusion_info.original_op_names[k]);
    }
    uint32_t stream_id = 0;
    auto iter = profiler_report_op_info_.find(fusion_op_name);
    if (iter != profiler_report_op_info_.end()) {
      stream_id = iter->second.second;
    }
    reporter_data.data = (unsigned char *)&stream_id;
    reporter_data.dataLen = sizeof(int32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // memory info
    reporter_data.data = (unsigned char *)&profile.memory_info;
    reporter_data.dataLen = sizeof(profile.memory_info);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    // task info
    reporter_data.data = (unsigned char *)&profile.task_count;
    reporter_data.dataLen = sizeof(uint32_t);
    GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                     "Reporter data fail, model id:%u.", this->Id());

    fusion_op_info[kFusionOpName] = fusion_op_name;
    fusion_op_info[kOriginalOpNum] = op_num;
    fusion_op_info[kOriginalOpName] = original_name;
    fusion_op_info[kStreamId] = stream_id;
    fusion_op_info[kFusionOpMemoryInfo][kInputSize] = profile.memory_info.input_size;
    fusion_op_info[kFusionOpMemoryInfo][kOutputSize] = profile.memory_info.output_size;
    fusion_op_info[kFusionOpMemoryInfo][kWeightSize] = profile.memory_info.weight_size;
    fusion_op_info[kFusionOpMemoryInfo][kWorkSpaceSize] = profile.memory_info.workspace_size;
    fusion_op_info[kFusionOpMemoryInfo][kTotalSize] = profile.memory_info.total_size;
    fusion_op_info[kTaskCount] = profile.task_count;
    vector<uint32_t> task_id;
    Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
    for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
      uint32_t task_id = idx->second;
      reporter_data.data = (unsigned char *)&task_id;
      reporter_data.dataLen = sizeof(uint32_t);
      GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                       "Reporter data fail, model id:%u.", this->Id());
      task_id.push_back(idx->second);
    }
    fusion_op_info[kTaskId] = task_id;
    model_load_info[kFusionOpInfo] += fusion_op_info;
  }

  std::string tag_name("model_load_info_" + std::to_string(this->Id()));
  std::string reported_data;
  try {
    reported_data = model_load_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
  } catch (std::exception &e) {
    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
  } catch (...) {
    GELOGE(FAILED, "Failed to convert JSON to string.");
  }
  reported_data.append(",")
               .append("\n");
  prof_mgr.ReportData(device_id_, reported_data, tag_name);
  return SUCCESS;
 }

 Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
  // profiling plugin must be registered
  auto &prof_mgr = ProfilingManager::Instance();
  ReporterData reporter_data{};

  string name = om_name_.empty() ? name_ : om_name_;
  Json model_time_info;
  model_time_info[kModelName] = name;
  model_time_info[kModeleId] = this->Id();
  model_time_info[kRequestId] = current_data.request_id;
  model_time_info[kThreadId] = GetDataInputTid();
  model_time_info[kInputBeginTime] = time_info_.processBeginTime;
  model_time_info[kInputEndTime] = time_info_.processEndTime;
  model_time_info[kInferBeginTime] = time_info_.inferenceBeginTime;
  model_time_info[kInferEndTime] = time_info_.inferenceEndTime;
  model_time_info[kOutputBeginTime] = time_info_.dumpBeginTime;
  model_time_info[kOutputEndTime] = time_info_.dumpEndTime;

  // report model data tag name
  std::string tag_name;
  tag_name.append("model_time_info_")
      .append(std::to_string(this->Id()))
      .append("_")
      .append(std::to_string(current_data.index));

  GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
                   return FAILED, "Sink model tag memcpy error.");
  // device id
  reporter_data.deviceId = device_id_;

  // Model Header
  string name;
  if (!om_name_.empty()) {
    name = om_name_;
  } else {
    name = name_;
  }
  size_t name_len = name.size();
  reporter_data.data = (unsigned char *)&name_len;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  reporter_data.data = (unsigned char *)name.c_str();
  reporter_data.dataLen = name.size();
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u.", this->Id());

  // request id
  uint64_t request_id = current_data.request_id;
  reporter_data.data = (unsigned char *)&request_id;
  reporter_data.dataLen = sizeof(uint32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

  // thread id
  int32_t thread_id = GetDataInputTid();
  reporter_data.data = (unsigned char *)&thread_id;
  reporter_data.dataLen = sizeof(int32_t);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);

  // time info
  time_info_.modelId = this->Id();
  reporter_data.data = (unsigned char *)&time_info_;
  reporter_data.dataLen = sizeof(struct timeInfo);
  GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
                   "Reporter data fail, model id:%u, data index:%u.", this->Id(), current_data.index);
    .append(std::to_string(this->Id()))
    .append("_")
    .append(std::to_string(current_data.index));
  std::string reported_data;
  try {
    reported_data = model_time_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
  } catch (std::exception &e) {
    GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
  } catch (...) {
    GELOGE(FAILED, "Failed to convert JSON to string.");
  }
  reported_data.append(",")
               .append("\n");
  prof_mgr.ReportData(device_id_, reported_data, tag_name);

  return SUCCESS;
 }
@@ -3069,13 +3017,15 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
    task_desc_info.model_name = name_;
  }
  task_desc_info.op_name = op->GetName();
  task_desc_info.op_type = op->GetType();
  task_desc_info.block_dim = task_def.kernel().block_dim();
  task_desc_info.task_id = task->GetTaskID();
  task_desc_info.stream_id = task->GetStreamId();
  task_desc_info.shape_type = "static";
  task_desc_info.cur_iter_num = 0;
  // task type
  task_desc_info.task_type = kTaskTypeInvalid;
  auto &prof_mgr = ProfilingManager::Instance();
  prof_mgr.GetOpInputOutputInfo(op, task_desc_info);
  auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
  if (model_task_type == RT_MODEL_TASK_KERNEL) {
    const domi::KernelDef &kernel_def = task_def.kernel();
@@ -3107,7 +3057,6 @@ void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfo
      task_desc_info_.emplace_back(task_desc_info);
    }
  }
  return;
 }

 Status DavinciModel::DistributeTask() {
@@ -4008,41 +3957,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
  main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
 }

 Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
  auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
  for (auto &op_desc : all_op_desc) {
    ComputeGraphDescInfo compute_graph_info;
    if (!om_name_.empty()) {
      compute_graph_info.model_name = om_name_;
    } else {
      compute_graph_info.model_name = name_;
    }

    std::vector<Format> format =  { FORMAT_NULL };
    std::vector<std::vector<int64_t>> shape = { {0} };
    std::vector<DataType> data_type = { DT_UNDEFINED };
    compute_graph_info.op_name = op_desc.op_name;
    compute_graph_info.op_type = op_desc.op_type;
    compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format;
    compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape;
    compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type;
    compute_graph_info.output_format = op_desc.output_format.empty() ? format :  op_desc.output_format;
    compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape;
    compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type;
    uint32_t task_id = 0;
    uint32_t stream_id = 0;
    auto iter = profiler_report_op_info_.find(op_desc.op_name);
    if (iter != profiler_report_op_info_.end()) {
      task_id = iter->second.first;
      stream_id = iter->second.second;
    }
    compute_graph_info.task_id = task_id;
    compute_graph_info.stream_id = stream_id;
    graph_desc_info.emplace_back(compute_graph_info);
  }
  return SUCCESS;
 }

 void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) {
  if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) {
    tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_;
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -840,9 +840,6 @@ class DavinciModel {

  Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

  // get desc info of graph for profiling
  Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);

  void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);

  Status InitL1DataDumperArgs();
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -70,8 +70,6 @@ class NodeDoneCallback {
  Status PrepareConstInputs(const NodeItem &node_item);
  Status DumpDynamicNode();
  Status ProfilingReport();
  Status GetGraphDescInfo(const NodePtr node, const HybridModel *model,
                          std::vector<ComputeGraphDescInfo> &compute_graph_info);
  Status GetTaskDescInfo(const NodePtr node, const HybridModel *model,
                         std::vector<TaskDescInfo> &task_desc_info);
  GraphExecutionContext *graph_context_;
@@ -159,51 +157,14 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
  }

  GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
  auto &prof_mgr = ProfilingManager::Instance();
  task_desc_info = context_->GetProfilingTaskDescInfo();
  context_->ClearProfilingTaskDescInfo();

  return SUCCESS;
 }

 Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model,
                                          std::vector<ComputeGraphDescInfo> &compute_graph_info) {
  GE_CHECK_NOTNULL(node);
  GE_CHECK_NOTNULL(model);

  GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str());
  compute_graph_info = context_->GetProfilingGraphDescInfo();
  context_->ClearProfilingGraphDescInfo();

  auto op_desc = node->GetOpDesc();
  GE_CHECK_NOTNULL(op_desc);
  for (auto &tmp_compute_graph_info : compute_graph_info) {
    // default
    if (op_desc->GetAllInputsSize() == 0) {
      tmp_compute_graph_info.input_format = { FORMAT_NULL };
      tmp_compute_graph_info.input_shape = { {0} };
      tmp_compute_graph_info.input_data_type = { DT_UNDEFINED };
    }
    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
      GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
      if (input_desc == nullptr) {
        continue;
      }
      tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
      tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
      tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
    }

    if (op_desc->GetOutputsSize() == 0) {
      tmp_compute_graph_info.output_format = { FORMAT_NULL };
      tmp_compute_graph_info.output_shape = { {0} };
      tmp_compute_graph_info.output_data_type = { DT_UNDEFINED };
    }
    for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
      GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
      tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
      tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
      tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
    }
  for (auto &tmp_task_desc : task_desc_info) {
    // save op input and output info
    auto op_desc = node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    prof_mgr.GetOpInputOutputInfo(op_desc, tmp_task_desc);
  }

  return SUCCESS;
@@ -233,15 +194,8 @@ Status NodeDoneCallback::ProfilingReport() {
    return profiling_ret;
  }

  std::vector<ComputeGraphDescInfo> compute_graph_info;
  profiling_ret = GetGraphDescInfo(node, model, compute_graph_info);
  if (profiling_ret != RT_ERROR_NONE) {
    GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str());
    return profiling_ret;
  }

  auto &profiling_manager = ProfilingManager::Instance();
  profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info);
  profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info);
  return SUCCESS;
 }

--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -189,12 +189,11 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
    uint32_t stream_id = 0;
    rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(rt_ret, "Get task_id and stream_id failed.");
      return FAILED;
      GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
    (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
    (void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
  }
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -201,12 +201,11 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
  uint32_t stream_id = 0;
  rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "Get task_id and stream_id failed.");
    return FAILED;
    GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
  (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
  (void)context.SaveProfilingGraphDescInfo(task_id, stream_id);
  auto callback = [=, &context]() {
    GELOGD("Node[%s] callback start.", node_name_.c_str());
    RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -515,7 +515,7 @@ Status TaskContext::Synchronize() {
 }

 Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream_id,
                                              uint32_t task_type, uint32_t block_dim) {
                                              const std::string &task_type, uint32_t block_dim) {
  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
    const NodeItem &node_item = GetNodeItem();
    auto op_desc = node_item.GetOpDesc();
@@ -525,11 +525,11 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t  stream
    const HybridModel *model = graph_context->model;
    GE_CHECK_NOTNULL(model);

    std::string op_name = op_desc->GetName();
    std::string dynamic_model_name = model->GetModelName();
    TaskDescInfo tmp_task_desc_info;
    tmp_task_desc_info.model_name = dynamic_model_name;
    tmp_task_desc_info.op_name = op_name;
    tmp_task_desc_info.op_name = op_desc->GetName();
    tmp_task_desc_info.op_type = op_desc->GetType();
    tmp_task_desc_info.block_dim = block_dim;
    tmp_task_desc_info.task_type = task_type;
    tmp_task_desc_info.task_id = task_id;
@@ -546,31 +546,5 @@ NodeState *TaskContext::GetNodeState() const {
  return node_state_;
 }

 Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) {
  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
    const NodeItem &node_item = GetNodeItem();
    auto op_desc = node_item.GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    const GraphExecutionContext *graph_context = GetExecutionContext();
    GE_CHECK_NOTNULL(graph_context);
    const HybridModel *model = graph_context->model;
    GE_CHECK_NOTNULL(model);

    std::string dynamic_model_name = model->GetModelName();
    auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
    if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
        op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
      ComputeGraphDescInfo tmp_compute_graph_info;
      tmp_compute_graph_info.model_name = dynamic_model_name;
      tmp_compute_graph_info.op_name = op_desc->GetName();
      tmp_compute_graph_info.op_type = op_desc->GetType();
      tmp_compute_graph_info.task_id = task_id;
      tmp_compute_graph_info.stream_id = stream_id;
      compute_graph_info.emplace_back(tmp_compute_graph_info);
    }
  }
  return SUCCESS;
 }

 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -113,13 +113,10 @@ class TaskContext {
  void *handle_ = nullptr;

  const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
  Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim);
  Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
                                   const std::string &task_type, uint32_t block_dim);
  void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }

  const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; }
  Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id);
  void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); }

 private:
  TaskContext(GraphExecutionContext *execution_context,
              NodeState *node_state,
@@ -141,7 +138,6 @@ class TaskContext {
  uint32_t task_id_ = 0;
  uint32_t stream_id_ = 0;
  std::vector<TaskDescInfo> task_desc_info;
  std::vector<ComputeGraphDescInfo> compute_graph_info;
 };
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -45,40 +45,24 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
    return SUCCESS;
  }

  string model_name;
  string op_name;
  TaskDescInfo tmp_task_desc_info;
  uint32_t model_id;
  uint32_t block_dim;
  if (op_task->GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) {
  if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) {
    GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed");
    return ACL_ERROR_GE_PARAM_INVALID;
  }
  GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str());
  std::vector<TaskDescInfo> task_desc_info;
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "Get task_id and stream_id failed.");
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GELOGD("ProfilingReport of op[%s] model[%s] start.",
         tmp_task_desc_info.op_name.c_str(), tmp_task_desc_info.model_name.c_str());

  TaskDescInfo tmp_task_desc_info;
  tmp_task_desc_info.model_name = model_name;
  tmp_task_desc_info.op_name = op_name;
  tmp_task_desc_info.block_dim = block_dim;
  tmp_task_desc_info.task_id = task_id;
  tmp_task_desc_info.stream_id = stream_id;
  tmp_task_desc_info.shape_type = shape_type;
  tmp_task_desc_info.cur_iter_num = 0;
  tmp_task_desc_info.task_type = op_task->GetTaskType();
  GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
  task_desc_info.emplace_back(tmp_task_desc_info);

  std::vector<ComputeGraphDescInfo> compute_graph_info;
  std::vector<TaskDescInfo> task_desc_info;
  task_desc_info.emplace_back(tmp_task_desc_info);

  auto &profiling_manager = ProfilingManager::Instance();
  profiling_manager.ReportProfilingData(model_id, task_desc_info, compute_graph_info);
  profiling_manager.ReportProfilingData(model_id, task_desc_info);
  return SUCCESS;
 }
 }  // namespace
--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -23,6 +23,7 @@
 #include "aicpu/common/aicpu_task_struct.h"
 #include "common/dump/dump_manager.h"
 #include "common/dump/dump_op.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/formats/formats.h"
 #include "common/math/math_util.h"
 #include "framework/common/debug/log.h"
@@ -108,15 +109,29 @@ void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
  model_id_ = model_id;
 }

 Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id,
                                uint32_t &block_dim) {
  model_name = model_name_;
  model_id = model_id_;
  block_dim = block_dim_;
 Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id) {
  uint32_t task_id = 0;
  uint32_t stream_id = 0;
  auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
  GE_CHECK_NOTNULL(op_desc_);
  op_name = op_desc_->GetName();
  string op_name = op_desc_->GetName();
  GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
  model_id = model_id_;
  task_desc_info.model_name = model_name_;
  task_desc_info.block_dim = block_dim_;
  task_desc_info.task_id = task_id;
  task_desc_info.stream_id = stream_id;
  task_desc_info.op_name = op_name;
  task_desc_info.op_type = op_desc_->GetType();
  auto &prof_mgr = ProfilingManager::Instance();
  prof_mgr.GetOpInputOutputInfo(op_desc_, task_desc_info);
  return SUCCESS;
 }

 Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
  return UNSUPPORTED;
 }
@@ -153,7 +168,7 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
  return UNSUPPORTED;
 }

 uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; }
 const std::string &OpTask::GetTaskType() const { return kTaskTypeInvalid; }

 TbeOpTask::~TbeOpTask() {
  if (sm_desc_ != nullptr) {
@@ -171,7 +186,7 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; }

 const std::string &TbeOpTask::GetStubName() const { return stub_name_; }

 uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }
 const std::string &TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }

 void TbeOpTask::SetHandle(void *handle) {
  this->handle_ = handle;
@@ -834,7 +849,7 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
  return DoUpdateArgTable(param, false);
 }

 uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }
 const std::string &AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }

 void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
  arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data());
--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -43,7 +43,7 @@ class OpTask {
                               const vector<GeTensorDesc> &output_desc);
  virtual Status UpdateArgTable(const SingleOpModelParam &param);
  void SetModelArgs(std::string model_name, uint32_t model_id);
  Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim);
  Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
  const OpDescPtr &GetOpdesc() const {return op_desc_;}
  Status OpenDump(rtStream_t stream);
  virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0;
@@ -52,7 +52,7 @@ class OpTask {
                              std::vector<GeTensorDesc> &output_desc,
                              std::vector<DataBuffer> &output_buffers,
                              rtStream_t stream);
  virtual uint32_t GetTaskType() const;
  virtual const std::string &GetTaskType() const;

 protected:
  Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);
@@ -88,7 +88,7 @@ class TbeOpTask : public OpTask {
  size_t GetArgSize() const;
  const std::string &GetStubName() const;
  void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
  uint32_t GetTaskType() const override;
  const std::string &GetTaskType() const override;
  void SetHandle(void *handle);

 private:
@@ -123,7 +123,7 @@ class AiCpuBaseTask : public OpTask {
  ~AiCpuBaseTask() override;
  UnknowShapeOpType GetUnknownType() const { return unknown_type_; }
  Status UpdateArgTable(const SingleOpModelParam &param) override;
  uint32_t GetTaskType() const override;
  const std::string &GetTaskType() const override;

 protected:
  Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -57,9 +57,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
 const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";

 // profiling data
 const uint32_t kTaskTypeAicore = 0;
 const uint32_t kTaskTypeAicpu = 1;
 const uint32_t kTaskTypeInvalid = 0xFFFF;
 const std::string kTaskTypeAicore = "AI_CORE";
 const std::string kTaskTypeAicpu = "AI_CPU";
 const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";

 // Data cache, including data address and length
 struct DataBuffer {
@@ -251,27 +251,19 @@ struct Options {
 struct TaskDescInfo {
  std::string model_name;
  std::string op_name;
  std::string op_type;
  uint32_t block_dim;
  uint32_t task_id;
  uint32_t stream_id;
  std::string shape_type;
  int64_t cur_iter_num;
  uint32_t task_type;
 };

 // Profiling info of graph
 struct ComputeGraphDescInfo {
  std::string model_name;
  std::string op_name;
  std::string op_type;
  std::string task_type;
  std::vector<Format> input_format;
  std::vector<std::vector<int64_t>> input_shape;
  std::vector<DataType> input_data_type;
  std::vector<Format> output_format;
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
  uint32_t task_id;
  uint32_t stream_id;
 };

 struct OpDescInfo {
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -761,7 +761,7 @@ set(GENERATOR_TEST_FILES
 )

 set(SINGLE_OP_TEST_FILES
    #"single_op/single_op_model_unittest.cc"
    "single_op/single_op_model_unittest.cc"
    "single_op/single_op_manager_unittest.cc"
    "single_op/stream_resource_unittest.cc"
    "single_op/single_op_task_unittest.cc"
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -890,4 +890,11 @@ TEST_F(UtestDavinciModel, Sink_model_profile) {
  model.SinkModelProfile();
 }

 TEST_F(UtestDavinciModel, Sink_time_profile) {
  ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
  DavinciModel model(0, nullptr);
  InputData current_data;
  model.SinkTimeProfile(current_data);
 }

 }  // namespace ge
--- a/tests/ut/ge/single_op/single_op_model_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_model_unittest.cc
@@ -40,6 +40,10 @@ class UtestSingleOpModel : public testing::Test {
  void TearDown() {}
 };

 //rt api stub
 rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) {
  return RT_ERROR_NONE;
 }
 /*
 TEST_F(UtestSingleOpModel, test_init_model) {
  string model_data_str = "123456789";
@@ -101,9 +105,9 @@ TEST_F(UtestSingleOpModel, test_set_inputs_and_outputs) {

  std::mutex stream_mu_;
  rtStream_t stream_ = nullptr;
  SingleOp single_op(&stream_mu_, stream_);

  ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
 //  SingleOp single_op(&stream_mu_, stream_);
 //
 //  ASSERT_EQ(model.SetInputsAndOutputs(single_op), SUCCESS);
 }
 /*
 TEST_F(UtestSingleOpModel, test_build_kernel_task) {
@@ -148,7 +152,7 @@ TEST_F(UtestSingleOpModel, test_init) {
  ASSERT_EQ(op_model.Init(), FAILED);
 }
 */

 /*
 TEST_F(UtestSingleOpModel, test_parse_arg_table) {
  string model_data_str = "123456789";
  SingleOpModel op_model("model", model_data_str.c_str(), model_data_str.size());
@@ -173,3 +177,23 @@ TEST_F(UtestSingleOpModel, test_parse_arg_table) {
  ASSERT_EQ(op.arg_table_[1].size(), 1);
  ASSERT_EQ(op.arg_table_[1].front(), &arg_base[0]);
 }
 */
 TEST_F(UtestSingleOpModel, test_op_task_get_profiler_args) {
  string name = "relu";
  string type = "relu";
  auto op_desc = std::make_shared<ge::OpDesc>(name, type);
  op_desc->SetStreamId(0);
  op_desc->SetId(0);
  TbeOpTask task;
  task.op_desc_ = op_desc;
  task.model_name_ = "resnet_50";
  task.model_id_ = 1;
  TaskDescInfo task_desc_info;
  uint32_t model_id;
  task.GetProfilingArgs(task_desc_info, model_id);

  ASSERT_EQ(task_desc_info.model_name, "resnet_50");
  ASSERT_EQ(model_id, 1);
 }