From 10662d550ff5a712d04c503a662b2e9d0aa74363 Mon Sep 17 00:00:00 2001
From: zhou_chao1993 <zhouchao46@huawei.com>
Date: Wed, 3 Mar 2021 11:05:35 +0800
Subject: [PATCH] dynamic shape over flow

---
 ge/CMakeLists.txt                             |   2 +
 ge/common/dump/dump_manager.cc                |   8 +-
 ge/common/dump/dump_op.cc                     |   6 +-
 ge/common/dump/dump_properties.h              |   2 +-
 ge/common/dump/opdebug_register.cc            | 148 ++++++++++++++++++
 ge/common/dump/opdebug_register.h             |  44 ++++++
 ge/executor/CMakeLists.txt                    |   1 +
 ge/graph/load/model_manager/data_dumper.h     |  53 +++----
 ge/graph/load/model_manager/davinci_model.cc  |  74 ++-------
 ge/graph/load/model_manager/davinci_model.h   |   4 +-
 .../executor/hybrid_model_async_executor.cc   |  40 +++++
 .../executor/hybrid_model_async_executor.h    |   7 +
 ge/hybrid/executor/worker/execution_engine.cc |   6 +-
 ge/hybrid/model/hybrid_model.h                |   4 +
 .../aicore/aicore_node_executor.cc            |  21 +++
 .../aicore/aicore_node_executor.h             |   1 +
 .../compiledsubgraph/known_node_executor.cc   |   2 +-
 ge/hybrid/node_executor/task_context.cc       |   8 +
 ge/hybrid/node_executor/task_context.h        |   5 +
 ge/single_op/task/op_task.cc                  |  26 ++-
 tests/depends/runtime/src/runtime_stub.cc     |   4 +
 tests/ut/ge/CMakeLists.txt                    |   2 +
 .../ut/ge/common/opdebug_register_unittest.cc |  51 ++++++
 23 files changed, 393 insertions(+), 126 deletions(-)
 create mode 100644 ge/common/dump/opdebug_register.cc
 create mode 100644 ge/common/dump/opdebug_register.h
 create mode 100644 tests/ut/ge/common/opdebug_register_unittest.cc

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 93c88cbf..8977ad85 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -103,6 +103,7 @@ set(TRAIN_SRC_LIST
     "common/profiling/profiling_manager.cc"
     "common/dump/dump_manager.cc"
     "common/dump/dump_properties.cc"
+    "common/dump/opdebug_register.cc"
     "common/dump/dump_op.cc"
     "common/profiling/ge_profiling.cc"
     "common/profiling/ge_runner_profiling.cc"
@@ -427,6 +428,7 @@ set(INFER_SRC_LIST
     "common/dump/dump_properties.cc"
     "common/dump/dump_manager.cc"
     "common/dump/dump_op.cc"
+    "common/dump/opdebug_register.cc"
     "common/dump/dump_server.cc"
     "common/helper/model_cache_helper.cc"
     "ge_local_engine/engine/host_cpu_engine.cc"
diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc
index 74324059..a659d9c6 100644
--- a/ge/common/dump/dump_manager.cc
+++ b/ge/common/dump/dump_manager.cc
@@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties(
   uint64_t session_id) {
   std::lock_guard<std::mutex> lock(mutex_);
-  // If session_id is not found in dump_properties_map_, operator[] will insert one.
-  return dump_properties_map_[session_id];
+  auto iter = dump_properties_map_.find(session_id);
+  if (iter != dump_properties_map_.end()) {
+    return iter->second;
+  }
+  static DumpProperties default_properties;
+  return default_properties;
 }
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties(
diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc
index 5c768e22..0becbdc8 100755
--- a/ge/common/dump/dump_op.cc
+++ b/ge/common/dump/dump_op.cc
@@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() {
   op_mapping_info.set_dump_path(dump_path);
   op_mapping_info.set_flag(kAicpuLoadFlag);
   op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
-  if (!dynamic_model_name_.empty()) {
+  op_mapping_info.set_model_id(dynamic_model_id_);
+  if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) {
     op_mapping_info.set_model_name(dynamic_model_name_);
-    op_mapping_info.set_model_id(dynamic_model_id_);
   }
   SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
   GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
@@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() {
     }
     op_mapping_info.mutable_task()->Add(std::move(task));
   }
-  if (dump_properties_.GetDumpMode() == kDumpAll) {
+  if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) {
     auto ret = DumpOutput(task);
     if (ret != SUCCESS) {
       GELOGE(ret, "Dump output failed when in dumping all");
diff --git a/ge/common/dump/dump_properties.h b/ge/common/dump/dump_properties.h
index 67f8c00e..8c064d58 100644
--- a/ge/common/dump/dump_properties.h
+++ b/ge/common/dump/dump_properties.h
@@ -81,11 +81,11 @@ class DumpProperties {
 
   const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;}
 
+
  private:
   void CopyFrom(const DumpProperties &other);
 
   void SetDumpDebugOptions();
-
   std::string enable_dump_;
   std::string enable_dump_debug_;
 
diff --git a/ge/common/dump/opdebug_register.cc b/ge/common/dump/opdebug_register.cc
new file mode 100644
index 00000000..340b89e5
--- /dev/null
+++ b/ge/common/dump/opdebug_register.cc
@@ -0,0 +1,148 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "opdebug_register.h"
+
+namespace {
+const size_t kOpDebugMemorySize = 2048UL;
+const size_t kDebugP2pSize = 8UL;
+}  // namespace
+namespace ge {
+OpdebugRegister::~OpdebugRegister() {}
+
+Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) {
+  GELOGD("Start to register debug for model in overflow");
+  auto ret = MallocMemForOpdebug();
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret);
+    return ret;
+  }
+  uint32_t debug_stream_id = 0;
+  uint32_t debug_task_id = 0;
+  auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+  GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id);
+  data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
+  return SUCCESS;
+}
+
+void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) {
+  rtError_t rt_ret = RT_ERROR_NONE;
+  if (model_handle != nullptr) {
+    GELOGD("start to call rtDebugUnRegister in model overflow.");
+    rt_ret = rtDebugUnRegister(model_handle);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret);
+    }
+  }
+
+  if (op_debug_addr_ != nullptr) {
+    rt_ret = rtFree(op_debug_addr_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGW("rtFree failed, ret: 0x%X", rt_ret);
+    }
+    op_debug_addr_ = nullptr;
+  }
+
+  if (p2p_debug_addr_ != nullptr) {
+    rt_ret = rtFree(p2p_debug_addr_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGW("rtFree failed, ret: 0x%X", rt_ret);
+    }
+    p2p_debug_addr_ = nullptr;
+  }
+  return;
+}
+
+Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) {
+  GELOGD("Start to register debug for stream in stream overflow");
+  auto ret = MallocMemForOpdebug();
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret);
+    return ret;
+  }
+
+  uint32_t debug_stream_id = 0;
+  uint32_t debug_task_id = 0;
+#ifdef ONLY_COMPILE_OPEN_SRC
+  auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+#endif
+  GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id);
+  data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
+  return SUCCESS;
+}
+
+void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
+  rtError_t rt_ret = RT_ERROR_NONE;
+#ifdef ONLY_COMPILE_OPEN_SRC
+  if (stream != nullptr) {
+    GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow.");
+    rt_ret = rtDebugUnRegisterForStream(stream);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret);
+    }
+  }
+#endif
+
+  if (op_debug_addr_ != nullptr) {
+    rt_ret = rtFree(op_debug_addr_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGW("rtFree failed, ret: 0x%X", rt_ret);
+    }
+    op_debug_addr_ = nullptr;
+  }
+
+  if (p2p_debug_addr_ != nullptr) {
+    rt_ret = rtFree(p2p_debug_addr_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGW("rtFree failed, ret: 0x%X", rt_ret);
+    }
+    p2p_debug_addr_ = nullptr;
+  }
+  return;
+}
+
+Status OpdebugRegister::MallocMemForOpdebug() {
+  rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+
+  uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_));
+  // For data dump, aicpu needs the pointer to pointer that save the real debug address.
+  rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+  rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+
+  return SUCCESS;
+}
+
+}  // namespace ge
\ No newline at end of file
diff --git a/ge/common/dump/opdebug_register.h b/ge/common/dump/opdebug_register.h
new file mode 100644
index 00000000..1826287d
--- /dev/null
+++ b/ge/common/dump/opdebug_register.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_
+#define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_
+
+#include <map>
+#include "common/debug/ge_log.h"
+#include "common/debug/log.h"
+#include "graph/load/model_manager/data_dumper.h"
+
+namespace ge {
+class OpdebugRegister {
+ public:
+  OpdebugRegister() = default;
+  ~OpdebugRegister();
+
+  Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper);
+  void UnregisterDebugForModel(rtModel_t model_handle);
+
+  Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper);
+  void UnregisterDebugForStream(rtStream_t stream);
+
+ private:
+  Status MallocMemForOpdebug();
+
+  void *op_debug_addr_ = nullptr;
+  void *p2p_debug_addr_ = nullptr;
+};
+}  // namespace ge
+#endif  // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 31cbad7a..04654f99 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -17,6 +17,7 @@ set(SRC_LIST
     "../common/dump/dump_properties.cc"
     "../common/dump/dump_manager.cc"
     "../common/dump/dump_op.cc"
+    "../common/dump/opdebug_register.cc"
     "../common/profiling/ge_profiling.cc"
     "../graph/load/graph_loader.cc"
     "../graph/execute/graph_execute.cc"
diff --git a/ge/graph/load/model_manager/data_dumper.h b/ge/graph/load/model_manager/data_dumper.h
index 8e612688..fbe70cf0 100755
--- a/ge/graph/load/model_manager/data_dumper.h
+++ b/ge/graph/load/model_manager/data_dumper.h
@@ -36,21 +36,9 @@
 namespace ge {
 class DataDumper {
  public:
-  explicit DataDumper(const RuntimeParam &rsh)
-      : model_name_(),
-        model_id_(0),
-        runtime_param_(rsh),
-        dev_mem_load_(nullptr),
-        dev_mem_unload_(nullptr),
-        op_list_(),
-        input_map_(),
-        load_flag_(false),
-        device_id_(0),
-        global_step_(0),
-        loop_per_iter_(0),
-        loop_cond_(0),
-        compute_graph_(nullptr),
-        ref_info_() {}
+  DataDumper() : runtime_param_{} {}
+
+  explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {}
 
   ~DataDumper();
 
@@ -105,10 +93,10 @@ class DataDumper {
   // for inference data dump
   std::string om_name_;
 
-  uint32_t model_id_;
+  uint32_t model_id_ = 0;
   const RuntimeParam &runtime_param_;
-  void *dev_mem_load_;
-  void *dev_mem_unload_;
+  void *dev_mem_load_ = nullptr;
+  void *dev_mem_unload_ = nullptr;
 
   struct InnerDumpInfo;
   struct InnerInputMapping;
@@ -119,16 +107,15 @@ class DataDumper {
   uint32_t end_graph_stream_id_ = 0;
   bool is_end_graph_ = false;
   std::multimap<std::string, InnerInputMapping> input_map_;  // release after DavinciModel::Init
-  bool load_flag_;
-  uint32_t device_id_;
-  uintptr_t global_step_;
-  uintptr_t loop_per_iter_;
-  uintptr_t loop_cond_;
-  ComputeGraphPtr compute_graph_;  // release after DavinciModel::Init
-  std::map<OpDescPtr, void *> ref_info_;  // release after DavinciModel::Init
+  bool load_flag_ = false;
+  uint32_t device_id_ = 0;
+  uintptr_t global_step_ = 0;
+  uintptr_t loop_per_iter_ = 0;
+  uintptr_t loop_cond_ = 0;
+  ComputeGraphPtr compute_graph_ = nullptr;  // release after DavinciModel::Init
+  std::map<OpDescPtr, void *> ref_info_;     // release after DavinciModel::Init
   void *l1_fusion_addr_ = nullptr;
 
-
   uint32_t op_debug_task_id_ = 0;
   uint32_t op_debug_stream_id_ = 0;
   void *op_debug_addr_ = nullptr;
@@ -144,20 +131,16 @@ class DataDumper {
   Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
   Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
   Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i,
-                       const std::string &node_name_index);
+                      const std::string &node_name_index);
   Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
   void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info);
   void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
                            aicpu::dump::OpMappingInfo &op_mapping_info);
   Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
-  Status GenerateInput(aicpu::dump::Input &input,
-                       const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
-                       const uintptr_t &addr,
-                       size_t index);
-  Status GenerateOutput(aicpu::dump::Output &output,
-                        const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
-                        const uintptr_t &addr,
-                        size_t index);
+  Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
+                       const uintptr_t &addr, size_t index);
+  Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
+                        const uintptr_t &addr, size_t index);
   void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task);
 };
 struct DataDumper::InnerDumpInfo {
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index b7bb97ce..c2ba4bf4 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -232,6 +232,8 @@ DavinciModel::~DavinciModel() {
 
       FreeP2PMem();
 
+      OpDebugUnRegister();
+
       if (l1_fusion_addr_ != nullptr) {
         GE_CHK_RT(rtFree(l1_fusion_addr_));
       }
@@ -242,8 +244,6 @@ DavinciModel::~DavinciModel() {
       }
     }
 
-    OpDebugUnRegister();
-
     ReleaseTask();
     CleanTbeHandle();
 
@@ -568,77 +568,21 @@ Status DavinciModel::SetTSDevice() {
 }
 
 Status DavinciModel::OpDebugRegister() {
-  bool is_op_debug = false;
-  (void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug);
-  GELOGD("The value of op debug in ge_model is %d.", is_op_debug);
-  if (is_op_debug) {
-    debug_reg_mutex_.lock();
-    rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-
-    uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_));
-
-    // For data dump, aicpu needs the pointer to pointer that save the real debug address.
-    rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-    rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-
-    uint32_t op_debug_mode = 0;
-    (void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode);
-    GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode);
-    uint32_t debug_task_id = 0;
-    uint32_t debug_stream_id = 0;
-    rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
+  if (GetDumpProperties().IsOpDebugOpen()) {
+     uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode();
+    auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_);
+    if (ret != SUCCESS) {
+      GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret);
+      return ret;
     }
-    GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id);
     is_op_debug_reg_ = true;
-
-    data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug);
   }
-
   return SUCCESS;
 }
 
 void DavinciModel::OpDebugUnRegister() {
   if (is_op_debug_reg_) {
-    debug_reg_mutex_.unlock();
-    rtError_t rt_ret = RT_ERROR_NONE;
-    if (rt_model_handle_ != nullptr) {
-      GELOGD("start call debug_unregister.");
-      rt_ret = rtDebugUnRegister(rt_model_handle_);
-      if (rt_ret != RT_ERROR_NONE) {
-        GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret);
-      }
-    }
-
-    if (op_debug_addr_ != nullptr) {
-      rt_ret = rtFree(op_debug_addr_);
-      if (rt_ret != RT_ERROR_NONE) {
-        GELOGW("rtFree failed, ret: 0x%X", rt_ret);
-      }
-      op_debug_addr_ = nullptr;
-    }
-
-    if (p2p_debug_addr_ != nullptr) {
-      rt_ret = rtFree(p2p_debug_addr_);
-      if (rt_ret != RT_ERROR_NONE) {
-        GELOGW("rtFree failed, ret: 0x%X", rt_ret);
-      }
-      p2p_debug_addr_ = nullptr;
-    }
+    opdebug_register_.UnregisterDebugForModel(rt_model_handle_);
     is_op_debug_reg_ = false;
   }
   return;
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index 4e29a4f4..70c0f687 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -29,6 +29,7 @@
 #include "common/helper/om_file_helper.h"
 #include "common/opskernel/ge_task_info.h"
 #include "common/properties_manager.h"
+#include "common/dump/opdebug_register.h"
 #include "common/types.h"
 #include "framework/common/util.h"
 #include "graph/debug/ge_attr_define.h"
@@ -984,6 +985,7 @@ class DavinciModel {
   int64_t maxDumpOpNum_;
   // for data dump
   DataDumper data_dumper_;
+  OpdebugRegister opdebug_register_;
   uint64_t iterator_count_;
   bool is_l1_fusion_enable_;
   map<OpDescPtr, void *> saved_task_addrs_;  // release after DavinciModel::Init
@@ -1021,8 +1023,6 @@ class DavinciModel {
   // for op debug
   mutex debug_reg_mutex_;
   bool is_op_debug_reg_ = false;
-  void *op_debug_addr_ = nullptr;
-  void *p2p_debug_addr_ = nullptr;
   bool is_online_infer_dynamic_ = false;
   bool is_getnext_sink_dynamic_ = false;
   vector<int32_t> cur_dynamic_dims_;
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc
index 7d163130..b6c4dc9e 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -85,6 +85,10 @@ Status HybridModelAsyncExecutor::Stop() {
     ret = future_.get();
   }
 
+  if (is_op_debug_reg_) {
+    op_debug_register_.UnregisterDebugForStream(stream_);
+  }
+
   if (stream_ != nullptr) {
     GE_CHK_RT(rtStreamDestroy(stream_));
     stream_ = nullptr;
@@ -101,6 +105,7 @@ Status HybridModelAsyncExecutor::Init() {
   executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
   GE_CHECK_NOTNULL(executor_);
   GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
+  GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine");
 
   GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
   if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
@@ -508,5 +513,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
 
   return SUCCESS;
 }
+Status HybridModelAsyncExecutor::DumpOpDebug() {
+  const DumpProperties &dump_properties = executor_->GetContext()->dump_properties;
+  if (dump_properties.IsOpDebugOpen()) {
+    GELOGD("Opdebug is open in hybrid engine");
+    uint32_t op_debug_mode = dump_properties.GetOpDebugMode();
+    GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_));
+    is_op_debug_reg_ = true;
+    data_dumper_.SetDumpProperties(dump_properties);
+    data_dumper_.SetModelName(model_->GetModelName());
+    data_dumper_.SetModelId(model_->GetModelId());
+    data_dumper_.SetDeviceId(model_->GetDeviceId());
+    void *global_step = nullptr;
+    TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
+    if (varible_global_step != nullptr) {
+      global_step = const_cast<void *>(varible_global_step->GetData());
+    }
+
+    void *loop_per_iter = nullptr;
+    TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
+    if (varible_loop_per_iter != nullptr) {
+      loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData());
+    }
+
+    void *loop_cond = nullptr;
+    TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND);
+    if (varible_loop_cond != nullptr) {
+      loop_cond = const_cast<void *>(varible_loop_cond->GetData());
+    }
+    data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond);
+    GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine");
+    GELOGD("Dump op debug SUCCESS in hybrid engine");
+  }
+  return SUCCESS;
+}
+
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index 4790248b..69d8a3f4 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -21,7 +21,9 @@
 #include <future>
 #include "external/ge/ge_api_error_codes.h"
 #include "external/ge/ge_api_types.h"
+#include "common/dump/opdebug_register.h"
 #include "graph/load/model_manager/data_inputer.h"
+#include "graph/load/model_manager/data_dumper.h"
 #include "hybrid/executor/hybrid_model_executor.h"
 #include "hybrid/executor/hybrid_model_pipeline_executor.h"
 #include "runtime/stream.h"
@@ -77,6 +79,8 @@ class HybridModelAsyncExecutor {
 
   Status PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args);
 
+  Status DumpOpDebug();
+
   std::mutex mu_;
   HybridModel *model_;
   uint32_t device_id_ = 0U;
@@ -94,6 +98,9 @@ class HybridModelAsyncExecutor {
   std::vector<bool> is_input_dynamic_;
   std::shared_ptr<ModelListener> listener_;
   string om_name_;
+  DataDumper data_dumper_;
+  bool is_op_debug_reg_ = false;
+  OpdebugRegister op_debug_register_;
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc
index 63d9126b..673c82dd 100755
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -266,9 +266,9 @@ Status NodeDoneCallback::OnNodeDone() {
   RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End");
   RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start");
 
-  auto dump_path = context_->GetDumpProperties().GetDumpPath();
-  if (!dump_path.empty()) {
-    GELOGI("Start to dump dynamic shape,dump_path is %s", dump_path.c_str());
+  const DumpProperties &dump_properties = context_->GetDumpProperties();
+  if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) {
+    GELOGI("Start to dump dynamic shape op");
     GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node");
   }
 
diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h
index 500f0472..3e5bd635 100644
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@@ -61,6 +61,10 @@ class HybridModel {
     device_id_ = device_id;
   }
 
+  uint32_t GetDeviceId() {
+    return device_id_;
+  }
+
   void SetModelId(uint32_t model_id) {
     model_id_ = model_id;
   }
diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
index 1640ad3b..119db0af 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -17,6 +17,7 @@
 #include "aicore_node_executor.h"
 #include "framework/common/taskdown_common.h"
 #include "hybrid/executor/hybrid_execution_context.h"
+#include "external/runtime/rt_error_codes.h"
 
 namespace ge {
 namespace hybrid {
@@ -189,6 +190,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
     }
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
     GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
+    GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context));
     // save profiling data
     uint32_t task_id = 0;
     uint32_t stream_id = 0;
@@ -259,6 +261,25 @@ void AiCoreNodeTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) {
   workspace_sizes_ = workspace_sizes;
 }
 
+Status AiCoreNodeTask::CheckOverflow(TaskContext &context) {
+  const DumpProperties &dump_properties = context.GetDumpProperties();
+  if (dump_properties.IsOpDebugOpen()) {
+    GELOGD("Op %s is doing overflow check in hybrid engine", context.GetNodeName());
+    auto rt_ret = rtStreamSynchronize(context.GetStream());
+    if (rt_ret == ACL_ERROR_RT_AICORE_OVER_FLOW) {
+      context.SetOverFlow(true);
+      GELOGW("Dynamic shape op %s is over flow", context.GetNodeName());
+      return SUCCESS;
+    } else if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(rt_ret, "rtstreamsynchronize failed");
+      return RT_ERROR_TO_GE_STATUS(rt_ret);
+    }
+    return SUCCESS;
+  }
+  GELOGD("Opdebug is not open in hybrid engine");
+  return SUCCESS;
+}
+
 TaskCompilerFactory &TaskCompilerFactory::GetInstance() {
   static TaskCompilerFactory instance;
   return instance;
diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h
index 2095b41d..c352764d 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h
@@ -62,6 +62,7 @@ class AiCoreNodeTask : public NodeTask {
   const vector<int64_t> &GetWorkspaceSizes() const;
   void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
  private:
+  Status CheckOverflow(TaskContext &context);
   std::vector<std::unique_ptr<AiCoreOpTask>> tasks_;
   std::vector<int64_t> workspace_sizes_;
 };
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
index 1d6e814b..cf5ac851 100755
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -124,7 +124,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
   }
   if (!load_flag_) {
     auto dump_properties = context.GetDumpProperties();
-    if (dump_properties.IsDumpOpen()) {
+    if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
       davinci_model_->SetDumpProperties(dump_properties);
       void *global_step = nullptr;
       TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP);
diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc
index 08cce30c..84dd8fd8 100644
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -350,6 +350,14 @@ void TaskContext::SetStreamId(uint32_t stream_id) {
   stream_id_ = stream_id;
 }
 
+void TaskContext::SetOverFlow(bool is_over_flow) {
+  is_over_flow_ = is_over_flow;
+}
+
+bool TaskContext::IsOverFlow() {
+  return is_over_flow_;
+}
+
 Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
   GE_CHECK_NOTNULL(buffer);
   if (ori_addr == nullptr) {
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index 645c1234..e00c5048 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -65,6 +65,7 @@ class TaskContext {
   int64_t GetSessionId() const;
   uint64_t GetIterationNumber() const;
 
+
   void NodeDone();
   void OnError(Status error);
 
@@ -106,6 +107,9 @@ class TaskContext {
   uint32_t GetStreamId() const;
   void SetStreamId(uint32_t stream_id);
 
+  void SetOverFlow(bool is_over_flow);
+  bool IsOverFlow();
+
   Status Synchronize();
 
   bool IsForceInferShape() const;
@@ -138,6 +142,7 @@ class TaskContext {
   uint32_t task_id_ = 0;
   uint32_t stream_id_ = 0;
   std::vector<TaskDescInfo> task_desc_info;
+  bool is_over_flow_ = false;
 };
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc
index 80c16968..f754af28 100755
--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -491,21 +491,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) {
   }
   GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape.");
 
-  GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(),
-                         aicpu_ext_handle_->GetExtInfoLen(),
-                         ext_info_addr_dev_,
-                         aicpu_ext_handle_->GetExtInfoLen(),
-                         RT_MEMCPY_DEVICE_TO_HOST));
+  GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_,
+                         aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST));
 
   for (size_t i = 0; i < num_outputs_; ++i) {
     GeShape shape;
     DataType data_type;
     aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type);
-    GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]),
-                      "AiCpuCCTask Update [%zu]th output shape failed.", i);
+    GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.",
+                      i);
     if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
-      GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]),
-                        "AiCpuCCTask Update [%zu]th output desc failed.", i);
+      GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.",
+                        i);
     }
   }
   GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished.");
@@ -697,10 +694,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
       const auto &shape_hbm = out_shape_hbm_[i];
 
       uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
-      std::unique_ptr<int64_t[]> shape_addr(new(std::nothrow) int64_t[dim_num]());
+      std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
       GE_CHECK_NOTNULL(shape_addr);
-      GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size,
-                             shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));
+      GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm,
+                             result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));
 
       for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
         shape_dims.emplace_back(shape_addr[dim_idx]);
@@ -711,13 +708,14 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
     GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]),
                       "AiCpuTask update [%zu]th output shape failed.", i);
     if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
-      GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]),
-                        "AiCpuTask update [%zu]th output desc failed.", i);
+      GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.",
+                        i);
     }
   }
   return SUCCESS;
 }
 
+
 Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc,
                                                     vector<DataBuffer> &outputs,
                                                     rtStream_t stream) {
diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc
index e6a7d66b..440b98e7 100644
--- a/tests/depends/runtime/src/runtime_stub.cc
+++ b/tests/depends/runtime/src/runtime_stub.cc
@@ -431,3 +431,7 @@ rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId)
 {
  return RT_ERROR_NONE;
 }
+
+rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId) {
+  return RT_ERROR_NONE;
+}
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 91b756cc..f87b09aa 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -162,6 +162,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc"
     "${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
+    "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc"
     "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
     "${GE_CODE_DIR}/ge/model/ge_root_model.cc"
     "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
@@ -733,6 +734,7 @@ set(MULTI_PARTS_TEST_FILES
     "graph/transop_util_unittest.cc"
     "common/datatype_transfer_unittest.cc"
     "common/dump_manager_unittest.cc"
+    "common/opdebug_register_unittest.cc"
     "common/format_transfer_unittest.cc"
     "common/format_transfer_transpose_unittest.cc"
     "common/format_transfer_nchw_5d_unittest.cc"
diff --git a/tests/ut/ge/common/opdebug_register_unittest.cc b/tests/ut/ge/common/opdebug_register_unittest.cc
new file mode 100644
index 00000000..fcdaddaf
--- /dev/null
+++ b/tests/ut/ge/common/opdebug_register_unittest.cc
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "common/dump/opdebug_register.h"
+#include "common/debug/log.h"
+#include "common/ge_inner_error_codes.h"
+
+namespace ge {
+class UTEST_opdebug_register : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() {}
+};
+ 
+TEST_F(UTEST_opdebug_register, register_debug_for_model_success) {
+  OpdebugRegister opdebug_register;
+  rtModel_t model_handle = (void*)0x111;
+  uint32_t op_debug_mode = 1;
+  DataDumper data_dumper;
+  auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper);
+  opdebug_register.UnregisterDebugForModel(model_handle);
+  EXPECT_EQ(ret, ge::SUCCESS);
+}
+
+TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) {
+  OpdebugRegister opdebug_register;
+  rtStream_t stream = (void*)0x111;
+  uint32_t op_debug_mode = 1;
+  DataDumper data_dumper;
+  auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper);
+  opdebug_register.UnregisterDebugForStream(stream);
+  EXPECT_EQ(ret, ge::SUCCESS);
+}
+
+
+}  // namespace ge
\ No newline at end of file