diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index e21bcb25..6e01ee87 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -21,6 +21,7 @@
 #include "framework/common/string_util.h"
 #include "graph/ge_context.h"
 #include "runtime/base.h"
+#include "graph/load/new_model_manager/davinci_model.h"
 
 namespace {
 const char *const kJobID = "jobID";
@@ -39,10 +40,12 @@ const std::string kConfigNumsdev = "devNums";
 const std::string kConfigDevIdList = "devIdList";
 const std::string kProfStart = "prof_start";
 const std::string kProfStop = "prof_stop";
+const std::string kProfModelSubscribe = "prof_model_subscribe";
+const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
 }  // namespace
 
 namespace ge {
-ProfilingManager::ProfilingManager() {}
+ProfilingManager::ProfilingManager() : subscribe_count_(0) {}
 
 ProfilingManager::~ProfilingManager() {}
 
@@ -54,6 +57,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   vector<int32_t>().swap(device_id_);
+  subscribe_count_ = 0;
   job_id_ = options.job_id;
 
   GELOGI("ProfilingManager::Init  job_id:%s", job_id_.c_str());
@@ -382,7 +386,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
 }
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
-    const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
+    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
   if (reporter == nullptr) {
@@ -401,7 +405,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
                      .append(op_name).append(" ")
                      .append(std::to_string(block_dim).append(" ")
                      .append(std::to_string(task_id)).append(" ")
-                     .append(std::to_string(stream_id)).append("\n"));
+                     .append(std::to_string(stream_id)).append(" ")
+                     .append(std::to_string(model_id)).append("\n"));
 
     Msprof::Engine::ReporterData reporter_data{};
     reporter_data.deviceId = device_id;
@@ -425,7 +430,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
 }
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
-    const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
+    uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
   GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;);
@@ -483,6 +488,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
       data.append("\"");
     }
 
+    data.append(" model_id:").append(std::to_string(model_id));
+
     data.append("\n");
 
     Msprof::Engine::ReporterData reporter_data{};
@@ -537,7 +544,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn
 }
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
-    const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
+    uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
+    const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
+    bool check_device) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   int32_t logic_device_id = 0;
   rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -546,7 +555,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
     return;
   }
   GELOGI("current logic_device_id:%d", logic_device_id);
-  if (!is_acl_api_mode_) {
+  if (check_device) {
     auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
     if (ret == device_id_.end()) {
       GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
@@ -554,9 +563,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
     }
   }
   GELOGI("start ProfilingTaskDescInfo.");
-  ProfilingTaskDescInfo(task_desc_info, logic_device_id);
+  ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
   GELOGI("start ProfilingGraphDescInfo.");
-  ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id);
+  ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
   GELOGI("Report profiling data for GE end.");
 #endif
 }
@@ -581,6 +590,105 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP
   return module;
 }
 
+void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type,
+                                                      uint32_t device_id,
+                                                      uint64_t module) {
+#ifdef DAVINCI_SUPPORT_PROFILING
+  if (prof_type == kProfModelSubscribe) {
+    if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) {
+      subs_dev_module_[device_id].subscribe_count++;
+    } else {
+      DeviceSubsInfo dev_info;
+      dev_info.module = module;
+      dev_info.subscribe_count = 1;
+      subs_dev_module_[device_id] = dev_info;
+    }
+  } else if (prof_type == kProfModelUnsubscribe) {
+    if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) {
+      if (subs_dev_module_[device_id].subscribe_count > 0) {
+        subs_dev_module_[device_id].subscribe_count--;
+      }
+    }
+  } else {
+    GELOGI("No need to update device_id module map.");
+  }
+#endif
+}
+
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelSubscribe(
+    uint64_t module, void *model) {
+#ifdef DAVINCI_SUPPORT_PROFILING
+  std::lock_guard<std::mutex> lock(mutex_);
+  uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK;
+  if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) {
+    // register framework to profiling
+    int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
+    if (result != SUCCESS) {
+      GELOGE(FAILED, "Register profiling engine failed.");
+      return FAILED;
+    }
+    GELOGI("Prof subscribe: model load profiling on.");
+  }
+  subscribe_count_++;
+
+  auto davinci_model = static_cast<DavinciModel *>(model);
+  int32_t device_num = 1;
+  uint32_t device[1];
+  device[0] = davinci_model->GetDeviceId();
+  rtError_t rt_ret = rtProfilerStart(module, device_num, device);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(FAILED, "Runtime profiler start failed.");
+    return FAILED;
+  }
+  UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module);
+
+  // Report profiling data
+  Status p_ret = davinci_model->ReportProfilingData(false);
+  if (p_ret != SUCCESS) {
+    GELOGE(p_ret, "Report profiling data failed.");
+    return p_ret;
+  }
+#endif
+  return SUCCESS;
+}
+
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelUnsubscribe(
+    void *model) {
+#ifdef DAVINCI_SUPPORT_PROFILING
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (subscribe_count_ == 0) {
+    GELOGW("The profiler has not been subscribed, you do not need to cannel the subscription.");
+    return SUCCESS;
+  }
+
+  auto davinci_model = static_cast<DavinciModel *>(model);
+  int32_t dev_num = 1;
+  uint32_t device[1];
+  device[0] = davinci_model->GetDeviceId();
+  auto iter = subs_dev_module_.find(device[0]);
+  if (iter != subs_dev_module_.end()) {
+    if (subs_dev_module_[device[0]].subscribe_count == 1) {
+      rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device);
+      if (rt_ret != RT_ERROR_NONE) {
+        GELOGE(FAILED, "Runtime profiler stop failed.");
+        return FAILED;
+      }
+    }
+    UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module);
+  }
+
+  subscribe_count_--;
+  if (subscribe_count_ == 0) {
+    int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE);
+    if (ret != SUCCESS) {
+      GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret);
+      return ret;
+    }
+  }
+#endif
+  return SUCCESS;
+}
+
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) {
 #ifdef DAVINCI_SUPPORT_PROFILING
   std::lock_guard<std::mutex> lock(mutex_);
@@ -748,6 +856,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
     device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
   }
   GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num);
+
   rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get());
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(FAILED, "Runtime profiler config proc failed.");
diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h
index 8fb59216..66cefc32 100755
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -39,6 +39,10 @@ namespace {
   const std::string GE_PROFILING_MODULE = "Framework";
 }  // namespace
 namespace ge {
+struct DeviceSubsInfo {
+  uint64_t module;
+  uint32_t subscribe_count;
+};
 // register Plugin
 class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf {
  public:
@@ -73,6 +77,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   ge::Status InitFromOptions(const Options &options);
   ge::Status InitFromAclCfg(const std::string &config);
   ge::Status StartProfiling(int32_t iter, int32_t device_id);
+  void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);
+  ge::Status ProfModelSubscribe(uint64_t module, void *model);
+  ge::Status ProfModelUnsubscribe(void *model);
   ge::Status ProfInit(uint64_t module);
   ge::Status ProfFinalize();
   ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
@@ -84,13 +91,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   bool ProfilingModelLoadOn() const { return is_load_profiling_; }
   bool ProfilingModelExecuteOn() const;
   bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used  by command pattern
+  bool IsAclApiMode() const { return is_acl_api_mode_; }
   int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; }
-  void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info,
-                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
+  void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
+                           const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
+                           bool check_device);
   void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter,
               Msprof::Engine::ReporterData &reporter_data);
-  void ProfilingTaskDescInfo(const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id);
-  void ProfilingGraphDescInfo(const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
+  void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
+                             const int32_t &device_id);
+  void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
                               const int32_t &device_id);
   void SetProfilingConfig(const string &profiling_cfg);
   vector<int32_t> GetProfilingDeviceId() const { return  device_id_; }
@@ -122,6 +132,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   string task_trace_conf_;
   const ProfilingEngineImpl engine_;
   map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module
+  map<uint32_t, DeviceSubsInfo> subs_dev_module_; // key: device_id, value: profiling on module
+  uint32_t subscribe_count_;
   std::mutex mutex_;
 };
 }  // namespace ge
diff --git a/ge/common/types.cc b/ge/common/types.cc
index 0d10f8b3..7ae0daa3 100755
--- a/ge/common/types.cc
+++ b/ge/common/types.cc
@@ -54,6 +54,7 @@ const std::map<std::string, std::string> PROFILE_COMPONENT_MAP{
     {"runtime", RTS_PROFILE},
 };
 const std::string PROFILE_CONFIG = "config";
+const std::string PROFILE_MODEL_ID = "modelId";
 
 REGISTER_OPTYPE_DEFINE(DATA, "Data");
 REGISTER_OPTYPE_DEFINE(AIPPDATA, "AippData");
diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index ad2879c2..967bf420 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -1062,6 +1062,19 @@ Status GeExecutor::ReleaseSingleOpResource(void *stream) {
   return SingleOpManager::GetInstance().ReleaseResource(stream);
 }
 
+Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) {
+  auto model_manager = ModelManager::GetInstance();
+  GE_CHECK_NOTNULL(model_manager);
+  auto davinci_model = model_manager->GetModel(model_id);
+  if (davinci_model == nullptr) {
+    GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id);
+    return FAILED;
+  }
+
+  device_id = davinci_model->GetDeviceId();
+  return SUCCESS;
+}
+
 Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) {
   std::vector<std::vector<int64_t>> batch_info;
   int32_t dynamic_type = static_cast<int32_t>(FIXED);
diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h
index 2acb963b..46ead310 100755
--- a/ge/graph/load/new_model_manager/data_dumper.h
+++ b/ge/graph/load/new_model_manager/data_dumper.h
@@ -86,6 +86,7 @@ class DataDumper {
   void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
   const DumpProperties &GetDumpProperties() const { return dump_properties_; }
   bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
+  const std::vector<OpDescInfo> &GetAllOpDescInfo() const { return op_desc_info_; }
 
   // Dump exception info
   Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file);
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 5ac825cc..f310e18e 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -258,7 +258,6 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) {
 ///
 void DavinciModel::Shrink() {
   ge_model_.reset();  // delete object.
-  op_list_.clear();
 }
 
 Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
@@ -653,18 +652,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
     GE_IF_BOOL_EXEC(IsBroadCastOpData(node),
                     (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore"););
   }
-  // for profiling
-  op_name_map_ = compute_graph->GetGraphOpName();
-
-  vector<string> op_name;
-  GE_IF_BOOL_EXEC(ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name),
-                  GELOGI("get str of task_index_op_name"));
-  if (op_name_map_.empty()) {
-    for (size_t idx = 0; idx < op_name.size(); idx++) {
-      op_name_map_[idx] = op_name[idx];
-    }
-    GELOGI("Infer profiling: op_name_size(%zu)", op_name.size());
-  }
 
   GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed");
 
@@ -700,15 +687,13 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   }
 
   // collect profiling for ge
-  if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
-    std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
-    Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info);
-    if (ret1 != SUCCESS) {
-      GELOGE(ret1, "GetComputeGraphInfo failed.");
-      return ret1;
+  auto &profiling_manager = ProfilingManager::Instance();
+  if (profiling_manager.ProfilingModelLoadOn()) {
+    Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode());
+    if (p_ret != SUCCESS) {
+      GELOGE(p_ret, "Report profiling data failed.");
+      return p_ret;
     }
-    ProfilingManager::Instance().ReportProfilingData(GetTaskDescInfo(), compute_graph_desc_info);
-    GE_CHK_STATUS(SinkModelProfile(), "Sink model profile failed.");
   }
 
   Shrink();
@@ -716,6 +701,20 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   return ret;
 }
 
+Status DavinciModel::ReportProfilingData(bool check_device) {
+  std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
+  Status ret = GetComputeGraphInfo(compute_graph_desc_info);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "GetComputeGraphInfo failed.");
+    return ret;
+  }
+  ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device);
+  GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
+  op_list_.clear();
+
+  return SUCCESS;
+}
+
 ///
 /// @ingroup ge
 /// @brief Travel all nodes and determine if destruction is required.
@@ -2909,34 +2908,25 @@ Status DavinciModel::DistributeTask() {
         SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
       }
     }
-    // get op_name by task_index
-    if (task->GetCtx() != nullptr) {
-      auto iter = op_name_map_.find(task_index);
-      if (iter == op_name_map_.end()) {
-        continue;
-      }
-
-      // else task index is found in op_name_map_
-      TaskDescInfo task_desc_info;
-      string op_name = op_name_map_[task_index];
-      if (!om_name_.empty()) {
-        task_desc_info.model_name = om_name_;
-      } else {
-        task_desc_info.model_name = name_;
-      }
-      task_desc_info.op_name = op_name;
-      task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim();
-      task_desc_info.task_id = task->GetTaskID();
-      task_desc_info.stream_id = task->GetStreamId();
-      task_desc_info_.emplace_back(task_desc_info);
-      if (flag) {
-        if (task->GetSktTaskID() != 0xFFFFFFFF) {
-          TaskDescInfo task_desc_info;
-          string op_name = "super_kernel_" + to_string(task_index);
-          task_desc_info.op_name = op_name;
-          task_desc_info.task_id = task->GetSktTaskID();
-          task_desc_info_.emplace_back(task_desc_info);
-        }
+    // Load task info for profiling
+    TaskDescInfo task_desc_info;
+    if (!om_name_.empty()) {
+      task_desc_info.model_name = om_name_;
+    } else {
+      task_desc_info.model_name = name_;
+    }
+    task_desc_info.op_name = op->GetName();
+    task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim();
+    task_desc_info.task_id = task->GetTaskID();
+    task_desc_info.stream_id = task->GetStreamId();
+    task_desc_info_.emplace_back(task_desc_info);
+    if (flag) {
+      if (task->GetSktTaskID() != 0xFFFFFFFF) {
+        TaskDescInfo task_desc_info;
+        string op_name = "super_kernel_" + to_string(task_index);
+        task_desc_info.op_name = op_name;
+        task_desc_info.task_id = task->GetSktTaskID();
+        task_desc_info_.emplace_back(task_desc_info);
       }
     }
   }
@@ -3826,50 +3816,31 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
   main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
 }
 
-Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info) {
+Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
   GELOGI("GetComputeGraphInfo start.");
-  for (auto &node : graph->GetAllNodes()) {
+  auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
+  for (auto &op_desc : all_op_desc) {
     ComputeGraphDescInfo compute_graph_info;
-    auto op_desc = node->GetOpDesc();
-    if (op_desc == nullptr) {
-      GELOGE(PARAM_INVALID, "op_desc is nullptr.");
-      return PARAM_INVALID;
+    if (!om_name_.empty()) {
+      compute_graph_info.model_name = om_name_;
+    } else {
+      compute_graph_info.model_name = name_;
     }
+    compute_graph_info.op_name = op_desc.op_name;
+    compute_graph_info.op_type = op_desc.op_type;
+    compute_graph_info.input_format = op_desc.input_format;
+    compute_graph_info.input_shape = op_desc.input_shape;
+    compute_graph_info.input_data_type = op_desc.input_data_type;
+    compute_graph_info.output_format = op_desc.output_format;
+    compute_graph_info.output_shape = op_desc.output_shape;
+    compute_graph_info.output_data_type = op_desc.output_data_type;
 
-    auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
-    if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
-        op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
-      if (!om_name_.empty()) {
-        compute_graph_info.model_name = om_name_;
-      } else {
-        compute_graph_info.model_name = name_;
-      }
-      compute_graph_info.op_name = op_desc->GetName();
-      compute_graph_info.op_type = op_desc->GetType();
-
-      for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
-        GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
-        if (input_desc == nullptr) {
-          continue;
-        }
-        compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
-        compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
-        compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
-      }
-
-      for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
-        GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
-        compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
-        compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
-        compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
-      }
-
-      graph_desc_info.emplace_back(compute_graph_info);
-    }
+    graph_desc_info.emplace_back(compute_graph_info);
   }
   GELOGI("GetComputeGraphInfo end.");
   return SUCCESS;
 }
+
 void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) {
   if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) {
     tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_;
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 964057a4..ccf6ff25 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -439,6 +439,8 @@ class DavinciModel {
 
   Status SinkTimeProfile(const InputData &current_data);
 
+  Status ReportProfilingData(bool check_device = true);
+
   void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
     data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
   }
@@ -830,7 +832,7 @@ class DavinciModel {
   Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);
 
   // get desc info of graph for profiling
-  Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info);
+  Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);
 
   void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);
 
@@ -949,7 +951,6 @@ class DavinciModel {
   std::map<std::string, uint32_t> used_tbe_handle_map_;
 
   // for profiling task and graph info
-  std::map<uint32_t, std::string> op_name_map_;
   std::vector<TaskDescInfo> task_desc_info_;
 
   int64_t maxDumpOpNum_;
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index ec111c3d..a286ff5c 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -43,6 +43,8 @@ const std::string kCmdTypeProfInit = "prof_init";
 const std::string kCmdTypeProfFinalize = "prof_finalize";
 const std::string kCmdTypeProfStart = "prof_start";
 const std::string kCmdTypeProfStop = "prof_stop";
+const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe";
+const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe";
 const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
 const char *const kDeleteCustOp = "deleteCustOp";
 struct CustAicpuSoBuf {
@@ -334,11 +336,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
 
     GELOGI("Parse model %u success.", model_id);
 
-    if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
-      davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
-                                                       timespec.tv_nsec));  // 1000 ^ 3 converts second to nanosecond
-      davinci_model->SetProfileTime(MODEL_LOAD_END);
-    }
+    davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
+                                                     timespec.tv_nsec));  // 1000 ^ 3 converts second to nanosecond
+    davinci_model->SetProfileTime(MODEL_LOAD_END);
   } while (0);
 
   GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
@@ -565,7 +565,9 @@ Status ModelManager::HandleCommand(const Command &command) {
       {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand},
       {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand},
       {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand},
-      {kCmdTypeProfStop, HandleProfStopCommand}};
+      {kCmdTypeProfStop, HandleProfStopCommand},
+      {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand},
+      {kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}};
 
   auto iter = cmds.find(command.cmd_type);
   if (iter == cmds.end()) {
@@ -591,6 +593,77 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) {
   return SUCCESS;
 }
 
+Status ModelManager::GetModelByCmd(const Command &command,
+                                   std::shared_ptr<DavinciModel> &davinci_model) {
+  if (command.cmd_params.size() < kCmdParSize) {
+    GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.",
+        command.cmd_type.c_str());
+    return PARAM_INVALID;
+  }
+
+  std::string map_key = command.cmd_params[0];
+  std::string value = command.cmd_params[1];
+   if (map_key == PROFILE_MODEL_ID) {
+    int32_t model_id = 0;
+    try {
+      model_id = std::stoi(value);
+    } catch (std::invalid_argument &) {
+      GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str());
+      return PARAM_INVALID;
+    } catch (std::out_of_range &) {
+      GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str());
+      return PARAM_INVALID;
+    } catch (...) {
+      GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str());
+      return FAILED;
+    }
+
+    auto model_manager = ModelManager::GetInstance();
+    GE_CHECK_NOTNULL(model_manager);
+    davinci_model = model_manager->GetModel(static_cast<uint32_t>(model_id));
+    if (davinci_model == nullptr) {
+      GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id);
+      return FAILED;
+    }
+  } else {
+    GELOGE(FAILED, "The model_id parameter is not found in the command.");
+    return FAILED;
+  }
+
+  return SUCCESS;
+}
+
+Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) {
+  std::shared_ptr<DavinciModel> davinci_model = nullptr;
+  Status ret = GetModelByCmd(command, davinci_model);
+  if (ret != SUCCESS) {
+    return ret;
+  }
+
+  if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index,
+                                                      static_cast<void *>(davinci_model.get())) != SUCCESS) {
+    GELOGE(FAILED, "Handle prof model subscribe failed.");
+    return FAILED;
+  }
+
+  return SUCCESS;
+}
+
+Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) {
+  std::shared_ptr<DavinciModel> davinci_model = nullptr;
+  Status ret = GetModelByCmd(command, davinci_model);
+  if (ret != SUCCESS) {
+    return ret;
+  }
+
+  if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast<void *>(davinci_model.get())) != SUCCESS) {
+    GELOGE(FAILED, "Handle prof model unsubscribe failed.");
+    return FAILED;
+  }
+
+  return SUCCESS;
+}
+
 Status ModelManager::HandleProfInitCommand(const Command &command) {
   uint64_t module_index = command.module_index;
   if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) {
@@ -973,11 +1046,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
 
     GELOGI("Parse model %u success.", model_id);
 
-    if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
-      davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
-                                                       timespec.tv_nsec));  // 1000 ^ 3 converts second to nanosecond
-      davinci_model->SetProfileTime(MODEL_LOAD_END);
-    }
+    davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
+                                                     timespec.tv_nsec));  // 1000 ^ 3 converts second to nanosecond
+    davinci_model->SetProfileTime(MODEL_LOAD_END);
 
     GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++);
     return SUCCESS;
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index d6a89d6b..8d46e578 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -158,10 +158,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   static ge::Status HandleAclProfilingCommand(const Command &command);
   static ge::Status HandleProfileCommand(const Command &command);
   static ge::Status HandleDumpCommand(const Command &command);
+  static ge::Status HandleProfModelSubscribeCommand(const Command &command);
+  static ge::Status HandleProfModelUnsubscribeCommand(const Command &command);
   static ge::Status HandleProfInitCommand(const Command &command);
   static ge::Status HandleProfFinalizeCommand(const Command &command);
   static ge::Status HandleProfStartCommand(const Command &command);
   static ge::Status HandleProfStopCommand(const Command &command);
+
+  static ge::Status GetModelByCmd(const Command &command,
+                                  std::shared_ptr<DavinciModel> &davinci_model);
   ///
   /// @ingroup domi_ome
   /// @brief get model memory usage
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index d230b949..e275150a 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -259,7 +259,9 @@ Status NodeDoneCallback::ProfilingReport() {
     return profiling_ret;
   }
 
-  ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info);
+  auto &profiling_manager = ProfilingManager::Instance();
+  profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info,
+                                        !profiling_manager.IsAclApiMode());
   return SUCCESS;
 }
 
diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h
index 0644b0f2..038b1cf6 100644
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -70,6 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map<std::string, std::string> PROFILE_COMPONENT_MAP;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG;
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID;
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR;
@@ -567,10 +568,10 @@ enum ModelCheckType {
 /// @brief dynamic input type
 ///
 enum DynamicInputType {
-    FIXED = 0,   // default mode
-    DYNAMIC_BATCH = 1,
-    DYNAMIC_IMAGE = 2,
-    DYNAMIC_DIMS = 3
+  FIXED = 0,  // default mode
+  DYNAMIC_BATCH = 1,
+  DYNAMIC_IMAGE = 2,
+  DYNAMIC_DIMS = 3
 };
 
 ///
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index ba90fd03..17dbf928 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -38,14 +38,14 @@ class DynamicSingleOp;
 struct RunModelData {
   uint32_t index;  // Data index
   uint32_t modelId;
-  std::vector<DataBuffer> blobs;      // All input/output data buffer
-  uint32_t timestamp;                 // Data creation time
-  uint32_t timeout;                   // Processing timeout
-  uint64_t request_id = 0;            // Request ID
-  uint64_t dynamic_batch_size = 0;    // Dynamic batch size scene, set dynamic size, not supported by default:0
-  uint64_t dynamic_image_height = 0;  // Dynamic image size scene, set image height, not supported by default:0
-  uint64_t dynamic_image_width = 0;   // Dynamic image size scene, set image width, not supported by default:0
-  std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty
+  std::vector<DataBuffer> blobs;       // All input/output data buffer
+  uint32_t timestamp;                  // Data creation time
+  uint32_t timeout;                    // Processing timeout
+  uint64_t request_id = 0;             // Request ID
+  uint64_t dynamic_batch_size = 0;     // Dynamic batch size scene, set dynamic size, not supported by default:0
+  uint64_t dynamic_image_height = 0;   // Dynamic image size scene, set image height, not supported by default:0
+  uint64_t dynamic_image_width = 0;    // Dynamic image size scene, set image width, not supported by default:0
+  std::vector<uint64_t> dynamic_dims;  // Dynamic dims scene, set dynamic dims, not supported by default:empty
 };
 
 class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
@@ -264,14 +264,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
   static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                         DynamicSingleOp **single_op);
 
-  static ge::Status ExecuteAsync(DynamicSingleOp *executor,
-                                 const std::vector<GeTensorDesc> &input_desc,
-                                 const std::vector<DataBuffer> &inputs,
-                                 std::vector<GeTensorDesc> &output_desc,
+  static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
+                                 const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
                                  std::vector<DataBuffer> &outputs);
 
   static ge::Status ReleaseSingleOpResource(void *stream);
 
+  static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
+
   ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
   ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
   ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,